diff --git a/builtin-functions/kphp-light/stdlib/server-functions.txt b/builtin-functions/kphp-light/stdlib/server-functions.txt index 3a348d9f4e..0e46d2a141 100644 --- a/builtin-functions/kphp-light/stdlib/server-functions.txt +++ b/builtin-functions/kphp-light/stdlib/server-functions.txt @@ -69,6 +69,8 @@ function setlocale ($category ::: int, $locale ::: string) ::: string | false; function memory_get_detailed_stats() ::: int[]; +function prepare_search_query ($query ::: string) ::: string; + function memory_get_total_usage() ::: int; function inet_pton ($address ::: string) ::: string | false; @@ -131,7 +133,3 @@ function flush() ::: void; define('PHP_QUERY_RFC1738', 1); define('PHP_QUERY_RFC3986', 2); - -/** @kphp-extern-func-info stub generation-required */ -function prepare_search_query ($query ::: string) ::: string; - diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp index 646997ab86..88d535eee0 100644 --- a/common/unicode/unicode-utils.cpp +++ b/common/unicode/unicode-utils.cpp @@ -4,7 +4,10 @@ #include "common/unicode/unicode-utils.h" +#include #include +#include +#include #include #include @@ -13,7 +16,7 @@ #include "common/unicode/utf8-utils.h" /* Search generated ranges for specified character */ -static int binary_search_ranges(const int* ranges, int r, int code) { +static int binary_search_ranges(const int* ranges, int r, int code, void (*assertf)(bool)) { if ((unsigned int)code > 0x10ffff) { return 0; } @@ -43,152 +46,158 @@ static int binary_search_ranges(const int* ranges, int r, int code) { case 2: return ((code - 1) | 1); default: - assert(0); - exit(1); + if (assertf != nullptr) { + assertf(false); + } } + return 0; } /* Convert character to upper case */ -int unicode_toupper(int code) { +int unicode_toupper(int code, void (*assertf)(bool)) { if ((unsigned int)code < (unsigned int)TABLE_SIZE) { return to_upper_table[code]; } else { - return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code); + return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, assertf); } } /* Convert character to lower case */ -int unicode_tolower(int code) { +int unicode_tolower(int code, void (*assertf)(bool)) { if ((unsigned int)code < (unsigned int)TABLE_SIZE) { return to_lower_table[code]; } else { - return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code); + return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, assertf); } } +inline constexpr int32_t WHITESPACE_CODE_POINT{static_cast(' ')}; +inline constexpr int32_t PLUS_CODE_POINT{static_cast('+')}; + /* Prepares unicode 0-terminated string input for search, leaving only digits and letters with diacritics. Length of string can decrease. Returns length of result. */ -int prepare_search_string(int* input) { - int i; - int* output = input; - for (i = 0; input[i]; i++) { - int c = input[i], new_c; - if ((unsigned int)c < (unsigned int)TABLE_SIZE) { - new_c = prepare_table[c]; +size_t prepare_search_string(int32_t* code_points, void (*assertf)(bool)) noexcept { + size_t output_size{}; + for (size_t i{}; code_points[i] != 0; ++i) { + int32_t c{code_points[i]}; + int32_t new_c{}; + if (static_cast(c) < static_cast(TABLE_SIZE)) { + new_c = static_cast(prepare_table[c]); } else { - new_c = binary_search_ranges(prepare_table_ranges, prepare_table_ranges_size, c); + new_c = binary_search_ranges(prepare_table_ranges, prepare_table_ranges_size, c, assertf); } - if (new_c) { - if (new_c != 0x20 || (output > input && output[-1] != 0x20)) { - *output++ = new_c; + if (new_c != 0) { + // we forbid 2 whitespaces after each other and starting whitespace + if (new_c != WHITESPACE_CODE_POINT || (output_size > 0 && code_points[output_size - 1] != WHITESPACE_CODE_POINT)) { + code_points[output_size++] = new_c; } } } - if (output > input && output[-1] == 0x20) { - output--; + if (output_size > 0 && code_points[output_size - 1] == WHITESPACE_CODE_POINT) { + // throw out terminating whitespace + --output_size; } - *output = 0; - return output - input; -} - -#define MAX_NAME_SIZE 65536 -static char prep_buf[4 * MAX_NAME_SIZE + 4]; -int prep_ibuf[MAX_NAME_SIZE + 4]; -static int prep_ibuf_res[MAX_NAME_SIZE + 4]; -static int* words_ibuf[MAX_NAME_SIZE + 4]; - -int stricmp_void(const void* x, const void* y) { - const int* s1 = *(const int**)x; - const int* s2 = *(const int**)y; - while (*s1 == *s2 && *s1 != ' ') - s1++, s2++; - return *s1 - *s2; + code_points[output_size] = 0; + return output_size; } -int* prepare_str_unicode(const int* x) { - int* v = prep_ibuf; - - int n; - if (v != x) { - for (n = 0; x[n]; n++) { - v[n] = x[n]; +inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, void (*assertf)(bool)) noexcept { + size_t code_points_length = prepare_search_string(code_points, assertf); + code_points[code_points_length] = WHITESPACE_CODE_POINT; + + size_t words_count{}; + size_t i{}; + // looking for the beginnings of the words + while (i < code_points_length) { + word_start_indices[words_count++] = i; + while (i < code_points_length && code_points[i] != WHITESPACE_CODE_POINT) { + ++i; } - v[n] = 0; + ++i; } - n = prepare_search_string(v); - v[n] = ' '; - - int i = 0, k = 0; - while (i < n) { - words_ibuf[k++] = v + i; - while (v[i] && v[i] != ' ') { - i++; + auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool { + while (code_points[x] != WHITESPACE_CODE_POINT && code_points[x] == code_points[y]) { + ++x; + ++y; } - i++; - } + if (code_points[x] == WHITESPACE_CODE_POINT) { + return code_points[y] != WHITESPACE_CODE_POINT; + } + if (code_points[y] == WHITESPACE_CODE_POINT) { + return false; + } + return code_points[x] < code_points[y]; + }}; - qsort(words_ibuf, (size_t)k, sizeof(int*), stricmp_void); + std::sort(word_start_indices, std::next(word_start_indices, words_count), word_less_cmp); - int j = 0; - for (i = 0; i < k; i++) { - if (j == 0 || stricmp_void(&words_ibuf[j - 1], &words_ibuf[i])) { - words_ibuf[j++] = words_ibuf[i]; + size_t uniq_words_count{}; + for (i = 0; i < words_count; ++i) { + // drop duplicates + if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) { + word_start_indices[uniq_words_count++] = word_start_indices[i]; } else { - words_ibuf[j - 1] = words_ibuf[i]; + word_start_indices[uniq_words_count - 1] = word_start_indices[i]; } } - k = j; - int* res = prep_ibuf_res; - for (i = 0; i < k; i++) { - int* tmp = words_ibuf[i]; - while (*tmp != ' ') { - *res++ = *tmp++; + size_t result_size{}; + // output words with '+' separator + for (i = 0; i < uniq_words_count; ++i) { + size_t ind{word_start_indices[i]}; + while (code_points[ind] != WHITESPACE_CODE_POINT) { + prepared_code_points[result_size++] = code_points[ind++]; } - *res++ = '+'; + prepared_code_points[result_size++] = PLUS_CODE_POINT; } - *res++ = 0; + prepared_code_points[result_size++] = 0; - assert(res - prep_ibuf_res < MAX_NAME_SIZE); - return prep_ibuf_res; + assertf(result_size < MAX_NAME_SIZE); + return result_size; } -const char* clean_str_unicode(const int* xx) { - assert(xx != NULL); - - int* v = prepare_str_unicode(xx); - int l = put_string_utf8(v, prep_buf); - assert(l < sizeof(prep_buf)); - - char *s = prep_buf, *x = prep_buf; - int skip; - - while (*x != 0) { - skip = !strncmp(x, "amp+", 4) || !strncmp(x, "gt+", 3) || !strncmp(x, "lt+", 3) || !strncmp(x, "quot+", 5) || !strncmp(x, "ft+", 3) || - !strncmp(x, "feat+", 5) || - (((x[0] == '1' && x[1] == '9') || (x[0] == '2' && x[1] == '0')) && ('0' <= x[2] && x[2] <= '9') && ('0' <= x[3] && x[3] <= '9') && x[4] == '+') || - !strncmp(x, "092+", 4) || !strncmp(x, "33+", 3) || !strncmp(x, "34+", 3) || !strncmp(x, "36+", 3) || !strncmp(x, "39+", 3) || - !strncmp(x, "60+", 3) || !strncmp(x, "62+", 3) || !strncmp(x, "8232+", 5) || !strncmp(x, "8233+", 5); +inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, + void (*assertf)(bool)) noexcept { + prepare_str_unicode(code_points, word_start_indices, prepared_code_points, assertf); + + auto length{static_cast(put_string_utf8(prepared_code_points, reinterpret_cast(utf8_result)))}; + assertf(length < MAX_NAME_BYTES_SIZE); + + size_t i{}; + size_t result_size{}; + while (i < length) { + char* c{reinterpret_cast(std::addressof(utf8_result[i]))}; + bool skip{!strncmp(c, "amp+", 4) || !strncmp(c, "gt+", 3) || !strncmp(c, "lt+", 3) || !strncmp(c, "quot+", 5) || !strncmp(c, "ft+", 3) || + !strncmp(c, "feat+", 5) || + (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') || + !strncmp(c, "092+", 4) || !strncmp(c, "33+", 3) || !strncmp(c, "34+", 3) || !strncmp(c, "36+", 3) || !strncmp(c, "39+", 3) || + !strncmp(c, "60+", 3) || !strncmp(c, "62+", 3) || !strncmp(c, "8232+", 5) || !strncmp(c, "8233+", 5)}; do { - *s = *x; if (!skip) { - s++; + utf8_result[result_size] = utf8_result[i]; + ++result_size; } - } while (*x++ != '+'); + } while (utf8_result[i++] != static_cast('+')); } - *s = 0; + utf8_result[result_size] = static_cast(0); - return prep_buf; + return result_size; } -const char* clean_str(const char* x) { - if (x == NULL || strlen(x) >= MAX_NAME_SIZE) { - return x; +size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, + void (*assertf)(bool)) { + size_t x_len{strlen(x)}; + if (assertf == nullptr || x == NULL || x_len >= MAX_NAME_SIZE) { + for (size_t i = 0; i < x_len; ++i) { + utf8_result[i] = static_cast(x[i]); + } + utf8_result[x_len] = static_cast(0); + return x_len; } - html_string_to_utf8(x, prep_ibuf); - return clean_str_unicode(prep_ibuf); + html_string_to_utf8(x, code_points); + return clean_str_unicode(code_points, word_start_indices, prepared_code_points, utf8_result, assertf); } diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h index fbbbe516b5..50c59af432 100644 --- a/common/unicode/unicode-utils.h +++ b/common/unicode/unicode-utils.h @@ -4,6 +4,13 @@ #pragma once -int unicode_toupper(int code); -int unicode_tolower(int code); -const char* clean_str(const char* x); +#include +#include + +inline constexpr size_t MAX_NAME_SIZE = 65536; +inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4; +inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4; + +int unicode_toupper(int code, void (*assertf)(bool)); +int unicode_tolower(int code, void (*assertf)(bool)); +size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, void (*assertf)(bool)); diff --git a/common/unicode/utf8-utils.cpp b/common/unicode/utf8-utils.cpp index fbf65bee80..ad0dfb39b1 100644 --- a/common/unicode/utf8-utils.cpp +++ b/common/unicode/utf8-utils.cpp @@ -4,7 +4,7 @@ #include "common/unicode/utf8-utils.h" -#include +#include #include #include @@ -991,164 +991,164 @@ int simplify_character(int c) { } } -const int _s_1__[] = {97, 0}; -const int _v_1__[] = {1072, 0}; -const int _s_2__[] = {98, 0}; -const int _v_2__[] = {1073, 0}; -const int _s_3__[] = {99, 0}; -const int _v_3__[] = {1082, 0}; -const int _s_4__[] = {99, 104, 0}; -const int _v_4__[] = {1095, 0}; -const int _s_5__[] = {100, 0}; -const int _v_5__[] = {1076, 0}; -const int _s_6__[] = {101, 0}; -const int _v_6__[] = {1077, 0}; -const int _s_7__[] = {101, 105, 0}; -const int _v_7__[] = {1077, 1081, 0}; -const int _s_8__[] = {101, 121, 0}; -const int _v_8__[] = {1077, 1081, 0}; -const int _s_9__[] = {102, 0}; -const int _v_9__[] = {1092, 0}; -const int _s_10__[] = {103, 0}; -const int _v_10__[] = {1075, 0}; -const int _s_11__[] = {104, 0}; -const int _v_11__[] = {1093, 0}; -const int _s_12__[] = {105, 0}; -const int _v_12__[] = {1080, 0}; -const int _s_13__[] = {105, 97, 0}; -const int _v_13__[] = {1080, 1103, 0}; -const int _s_14__[] = {105, 121, 0}; -const int _v_14__[] = {1080, 1081, 0}; -const int _s_15__[] = {106, 0}; -const int _v_15__[] = {1081, 0}; -const int _s_16__[] = {106, 111, 0}; -const int _v_16__[] = {1077, 0}; -const int _s_17__[] = {106, 117, 0}; -const int _v_17__[] = {1102, 0}; -const int _s_18__[] = {106, 97, 0}; -const int _v_18__[] = {1103, 0}; -const int _s_19__[] = {107, 0}; -const int _v_19__[] = {1082, 0}; -const int _s_20__[] = {107, 104, 0}; -const int _v_20__[] = {1093, 0}; -const int _s_21__[] = {108, 0}; -const int _v_21__[] = {1083, 0}; -const int _s_22__[] = {109, 0}; -const int _v_22__[] = {1084, 0}; -const int _s_23__[] = {110, 0}; -const int _v_23__[] = {1085, 0}; -const int _s_24__[] = {111, 0}; -const int _v_24__[] = {1086, 0}; -const int _s_25__[] = {112, 0}; -const int _v_25__[] = {1087, 0}; -const int _s_26__[] = {113, 0}; -const int _v_26__[] = {1082, 0}; -const int _s_27__[] = {114, 0}; -const int _v_27__[] = {1088, 0}; -const int _s_28__[] = {115, 0}; -const int _v_28__[] = {1089, 0}; -const int _s_29__[] = {115, 104, 0}; -const int _v_29__[] = {1096, 0}; -const int _s_30__[] = {115, 104, 99, 104, 0}; -const int _v_30__[] = {1097, 0}; -const int _s_31__[] = {115, 99, 104, 0}; -const int _v_31__[] = {1097, 0}; -const int _s_32__[] = {116, 0}; -const int _v_32__[] = {1090, 0}; -const int _s_33__[] = {116, 115, 0}; -const int _v_33__[] = {1094, 0}; -const int _s_34__[] = {117, 0}; -const int _v_34__[] = {1091, 0}; -const int _s_35__[] = {118, 0}; -const int _v_35__[] = {1074, 0}; -const int _s_36__[] = {119, 0}; -const int _v_36__[] = {1074, 0}; -const int _s_37__[] = {120, 0}; -const int _v_37__[] = {1082, 1089, 0}; -const int _s_38__[] = {121, 0}; -const int _v_38__[] = {1080, 0}; -const int _s_39__[] = {121, 111, 0}; -const int _v_39__[] = {1077, 0}; -const int _s_40__[] = {121, 117, 0}; -const int _v_40__[] = {1102, 0}; -const int _s_41__[] = {121, 97, 0}; -const int _v_41__[] = {1103, 0}; -const int _s_42__[] = {122, 0}; -const int _v_42__[] = {1079, 0}; -const int _s_43__[] = {122, 104, 0}; -const int _v_43__[] = {1078, 0}; -const int _s_44__[] = {1072, 0}; -const int _v_44__[] = {97, 0}; -const int _s_45__[] = {1073, 0}; -const int _v_45__[] = {98, 0}; -const int _s_46__[] = {1074, 0}; -const int _v_46__[] = {118, 0}; -const int _s_47__[] = {1075, 0}; -const int _v_47__[] = {103, 0}; -const int _s_48__[] = {1076, 0}; -const int _v_48__[] = {100, 0}; -const int _s_49__[] = {1077, 0}; -const int _v_49__[] = {101, 0}; -const int _s_50__[] = {1105, 0}; -const int _v_50__[] = {101, 0}; -const int _s_51__[] = {1078, 0}; -const int _v_51__[] = {122, 104, 0}; -const int _s_52__[] = {1079, 0}; -const int _v_52__[] = {122, 0}; -const int _s_53__[] = {1080, 0}; -const int _v_53__[] = {105, 0}; -const int _s_54__[] = {1080, 1081, 0}; -const int _v_54__[] = {121, 0}; -const int _s_55__[] = {1080, 1103, 0}; -const int _v_55__[] = {105, 97, 0}; -const int _s_56__[] = {1081, 0}; -const int _v_56__[] = {121, 0}; -const int _s_57__[] = {1082, 0}; -const int _v_57__[] = {107, 0}; -const int _s_58__[] = {1082, 1089, 0}; -const int _v_58__[] = {120, 0}; -const int _s_59__[] = {1083, 0}; -const int _v_59__[] = {108, 0}; -const int _s_60__[] = {1084, 0}; -const int _v_60__[] = {109, 0}; -const int _s_61__[] = {1085, 0}; -const int _v_61__[] = {110, 0}; -const int _s_62__[] = {1086, 0}; -const int _v_62__[] = {111, 0}; -const int _s_63__[] = {1087, 0}; -const int _v_63__[] = {112, 0}; -const int _s_64__[] = {1088, 0}; -const int _v_64__[] = {114, 0}; -const int _s_65__[] = {1089, 0}; -const int _v_65__[] = {115, 0}; -const int _s_66__[] = {1090, 0}; -const int _v_66__[] = {116, 0}; -const int _s_67__[] = {1091, 0}; -const int _v_67__[] = {117, 0}; -const int _s_68__[] = {1092, 0}; -const int _v_68__[] = {102, 0}; -const int _s_69__[] = {1093, 0}; -const int _v_69__[] = {107, 104, 0}; -const int _s_70__[] = {1094, 0}; -const int _v_70__[] = {116, 115, 0}; -const int _s_71__[] = {1095, 0}; -const int _v_71__[] = {99, 104, 0}; -const int _s_72__[] = {1096, 0}; -const int _v_72__[] = {115, 104, 0}; -const int _s_73__[] = {1097, 0}; -const int _v_73__[] = {115, 104, 99, 104, 0}; -const int _s_74__[] = {1098, 0}; -const int _v_74__[] = {0}; -const int _s_75__[] = {1099, 0}; -const int _v_75__[] = {121, 0}; -const int _s_76__[] = {1100, 0}; -const int _v_76__[] = {0}; -const int _s_77__[] = {1101, 0}; -const int _v_77__[] = {101, 0}; -const int _s_78__[] = {1102, 0}; -const int _v_78__[] = {121, 117, 0}; -const int _s_79__[] = {1103, 0}; -const int _v_79__[] = {121, 97, 0}; +constexpr std::array _s_1__{97, 0}; +constexpr std::array _v_1__{1072, 0}; +constexpr std::array _s_2__{98, 0}; +constexpr std::array _v_2__{1073, 0}; +constexpr std::array _s_3__{99, 0}; +constexpr std::array _v_3__{1082, 0}; +constexpr std::array _s_4__{99, 104, 0}; +constexpr std::array _v_4__{1095, 0}; +constexpr std::array _s_5__{100, 0}; +constexpr std::array _v_5__{1076, 0}; +constexpr std::array _s_6__{101, 0}; +constexpr std::array _v_6__{1077, 0}; +constexpr std::array _s_7__{101, 105, 0}; +constexpr std::array _v_7__{1077, 1081, 0}; +constexpr std::array _s_8__{101, 121, 0}; +constexpr std::array _v_8__{1077, 1081, 0}; +constexpr std::array _s_9__{102, 0}; +constexpr std::array _v_9__{1092, 0}; +constexpr std::array _s_10__{103, 0}; +constexpr std::array _v_10__{1075, 0}; +constexpr std::array _s_11__{104, 0}; +constexpr std::array _v_11__{1093, 0}; +constexpr std::array _s_12__{105, 0}; +constexpr std::array _v_12__{1080, 0}; +constexpr std::array _s_13__{105, 97, 0}; +constexpr std::array _v_13__{1080, 1103, 0}; +constexpr std::array _s_14__{105, 121, 0}; +constexpr std::array _v_14__{1080, 1081, 0}; +constexpr std::array _s_15__{106, 0}; +constexpr std::array _v_15__{1081, 0}; +constexpr std::array _s_16__{106, 111, 0}; +constexpr std::array _v_16__{1077, 0}; +constexpr std::array _s_17__{106, 117, 0}; +constexpr std::array _v_17__{1102, 0}; +constexpr std::array _s_18__{106, 97, 0}; +constexpr std::array _v_18__{1103, 0}; +constexpr std::array _s_19__{107, 0}; +constexpr std::array _v_19__{1082, 0}; +constexpr std::array _s_20__{107, 104, 0}; +constexpr std::array _v_20__{1093, 0}; +constexpr std::array _s_21__{108, 0}; +constexpr std::array _v_21__{1083, 0}; +constexpr std::array _s_22__{109, 0}; +constexpr std::array _v_22__{1084, 0}; +constexpr std::array _s_23__{110, 0}; +constexpr std::array _v_23__{1085, 0}; +constexpr std::array _s_24__{111, 0}; +constexpr std::array _v_24__{1086, 0}; +constexpr std::array _s_25__{112, 0}; +constexpr std::array _v_25__{1087, 0}; +constexpr std::array _s_26__{113, 0}; +constexpr std::array _v_26__{1082, 0}; +constexpr std::array _s_27__{114, 0}; +constexpr std::array _v_27__{1088, 0}; +constexpr std::array _s_28__{115, 0}; +constexpr std::array _v_28__{1089, 0}; +constexpr std::array _s_29__{115, 104, 0}; +constexpr std::array _v_29__{1096, 0}; +constexpr std::array _s_30__{115, 104, 99, 104, 0}; +constexpr std::array _v_30__{1097, 0}; +constexpr std::array _s_31__{115, 99, 104, 0}; +constexpr std::array _v_31__{1097, 0}; +constexpr std::array _s_32__{116, 0}; +constexpr std::array _v_32__{1090, 0}; +constexpr std::array _s_33__{116, 115, 0}; +constexpr std::array _v_33__{1094, 0}; +constexpr std::array _s_34__{117, 0}; +constexpr std::array _v_34__{1091, 0}; +constexpr std::array _s_35__{118, 0}; +constexpr std::array _v_35__{1074, 0}; +constexpr std::array _s_36__{119, 0}; +constexpr std::array _v_36__{1074, 0}; +constexpr std::array _s_37__{120, 0}; +constexpr std::array _v_37__{1082, 1089, 0}; +constexpr std::array _s_38__{121, 0}; +constexpr std::array _v_38__{1080, 0}; +constexpr std::array _s_39__{121, 111, 0}; +constexpr std::array _v_39__{1077, 0}; +constexpr std::array _s_40__{121, 117, 0}; +constexpr std::array _v_40__{1102, 0}; +constexpr std::array _s_41__{121, 97, 0}; +constexpr std::array _v_41__{1103, 0}; +constexpr std::array _s_42__{122, 0}; +constexpr std::array _v_42__{1079, 0}; +constexpr std::array _s_43__{122, 104, 0}; +constexpr std::array _v_43__{1078, 0}; +constexpr std::array _s_44__{1072, 0}; +constexpr std::array _v_44__{97, 0}; +constexpr std::array _s_45__{1073, 0}; +constexpr std::array _v_45__{98, 0}; +constexpr std::array _s_46__{1074, 0}; +constexpr std::array _v_46__{118, 0}; +constexpr std::array _s_47__{1075, 0}; +constexpr std::array _v_47__{103, 0}; +constexpr std::array _s_48__{1076, 0}; +constexpr std::array _v_48__{100, 0}; +constexpr std::array _s_49__{1077, 0}; +constexpr std::array _v_49__{101, 0}; +constexpr std::array _s_50__{1105, 0}; +constexpr std::array _v_50__{101, 0}; +constexpr std::array _s_51__{1078, 0}; +constexpr std::array _v_51__{122, 104, 0}; +constexpr std::array _s_52__{1079, 0}; +constexpr std::array _v_52__{122, 0}; +constexpr std::array _s_53__{1080, 0}; +constexpr std::array _v_53__{105, 0}; +constexpr std::array _s_54__{1080, 1081, 0}; +constexpr std::array _v_54__{121, 0}; +constexpr std::array _s_55__{1080, 1103, 0}; +constexpr std::array _v_55__{105, 97, 0}; +constexpr std::array _s_56__{1081, 0}; +constexpr std::array _v_56__{121, 0}; +constexpr std::array _s_57__{1082, 0}; +constexpr std::array _v_57__{107, 0}; +constexpr std::array _s_58__{1082, 1089, 0}; +constexpr std::array _v_58__{120, 0}; +constexpr std::array _s_59__{1083, 0}; +constexpr std::array _v_59__{108, 0}; +constexpr std::array _s_60__{1084, 0}; +constexpr std::array _v_60__{109, 0}; +constexpr std::array _s_61__{1085, 0}; +constexpr std::array _v_61__{110, 0}; +constexpr std::array _s_62__{1086, 0}; +constexpr std::array _v_62__{111, 0}; +constexpr std::array _s_63__{1087, 0}; +constexpr std::array _v_63__{112, 0}; +constexpr std::array _s_64__{1088, 0}; +constexpr std::array _v_64__{114, 0}; +constexpr std::array _s_65__{1089, 0}; +constexpr std::array _v_65__{115, 0}; +constexpr std::array _s_66__{1090, 0}; +constexpr std::array _v_66__{116, 0}; +constexpr std::array _s_67__{1091, 0}; +constexpr std::array _v_67__{117, 0}; +constexpr std::array _s_68__{1092, 0}; +constexpr std::array _v_68__{102, 0}; +constexpr std::array _s_69__{1093, 0}; +constexpr std::array _v_69__{107, 104, 0}; +constexpr std::array _s_70__{1094, 0}; +constexpr std::array _v_70__{116, 115, 0}; +constexpr std::array _s_71__{1095, 0}; +constexpr std::array _v_71__{99, 104, 0}; +constexpr std::array _s_72__{1096, 0}; +constexpr std::array _v_72__{115, 104, 0}; +constexpr std::array _s_73__{1097, 0}; +constexpr std::array _v_73__{115, 104, 99, 104, 0}; +constexpr std::array _s_74__{1098, 0}; +constexpr std::array _v_74__{0}; +constexpr std::array _s_75__{1099, 0}; +constexpr std::array _v_75__{121, 0}; +constexpr std::array _s_76__{1100, 0}; +constexpr std::array _v_76__{0}; +constexpr std::array _s_77__{1101, 0}; +constexpr std::array _v_77__{101, 0}; +constexpr std::array _s_78__{1102, 0}; +constexpr std::array _v_78__{121, 117, 0}; +constexpr std::array _s_79__{1103, 0}; +constexpr std::array _v_79__{121, 97, 0}; int translit_string_utf8_from_en_to_ru(int* input, int* output) { @@ -1158,8 +1158,8 @@ int translit_string_utf8_from_en_to_ru(int* input, int* output) { k++; \ } \ if (!s[k]) { \ - match_v = v; \ - match_s = s; \ + match_v = v.data(); \ + match_s = s.data(); \ } int i = 0, j = 0, k = 0; @@ -1340,8 +1340,8 @@ int translit_string_utf8_from_ru_to_en(int* input, int* output) { k++; \ } \ if (!s[k]) { \ - match_v = v; \ - match_s = s; \ + match_v = v.data(); \ + match_s = s.data(); \ } int i = 0, j = 0, k = 0; diff --git a/runtime-common/stdlib/string/mbstring-functions.cpp b/runtime-common/stdlib/string/mbstring-functions.cpp index ef84817f03..6faf0566db 100644 --- a/runtime-common/stdlib/string/mbstring-functions.cpp +++ b/runtime-common/stdlib/string/mbstring-functions.cpp @@ -130,7 +130,7 @@ int64_t f$mb_strlen(const string& str, const string& encoding) noexcept { return mb_UTF8_strlen(str.c_str()); } -string f$mb_strtolower(const string& str, const string& encoding) noexcept { +string mb_strtolower_impl(const string& str, void (*assertf)(bool), const string& encoding) noexcept { int encoding_num = mb_detect_encoding(encoding); if (encoding_num < 0) { php_critical_error("encoding \"%s\" doesn't supported in mb_strtolower", encoding.c_str()); @@ -184,7 +184,7 @@ string f$mb_strtolower(const string& str, const string& encoding) noexcept { int ch = 0; while ((p = get_char_utf8(&ch, s)) > 0) { s += p; - res_len += put_char_utf8(unicode_tolower(ch), &res[res_len]); + res_len += put_char_utf8(unicode_tolower(ch, assertf), &res[res_len]); } if (p < 0) { php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtolower", str.c_str()); @@ -195,7 +195,7 @@ string f$mb_strtolower(const string& str, const string& encoding) noexcept { } } -string f$mb_strtoupper(const string& str, const string& encoding) noexcept { +string mb_strtoupper_impl(const string& str, void (*assertf)(bool), const string& encoding) noexcept { int encoding_num = mb_detect_encoding(encoding); if (encoding_num < 0) { php_critical_error("encoding \"%s\" doesn't supported in mb_strtoupper", encoding.c_str()); @@ -254,7 +254,7 @@ string f$mb_strtoupper(const string& str, const string& encoding) noexcept { int ch = 0; while ((p = get_char_utf8(&ch, s)) > 0) { s += p; - res_len += put_char_utf8(unicode_toupper(ch), &res[res_len]); + res_len += put_char_utf8(unicode_toupper(ch, assertf), &res[res_len]); } if (p < 0) { php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtoupper", str.c_str()); @@ -307,9 +307,9 @@ Optional f$mb_strpos(const string& haystack, const string& needle, int6 return false; } -Optional f$mb_stripos(const string& haystack, const string& needle, int64_t offset, const string& encoding) noexcept { +Optional mb_stripos_impl(const string& haystack, const string& needle, void (*assertf)(bool), int64_t offset, const string& encoding) noexcept { if (const int encoding_num = check_strpos_agrs("mb_stripos", needle, offset, encoding)) { - return mp_strpos_impl(f$mb_strtolower(haystack, encoding), f$mb_strtolower(needle, encoding), offset, encoding_num); + return mp_strpos_impl(mb_strtolower_impl(haystack, assertf, encoding), mb_strtolower_impl(needle, assertf, encoding), offset, encoding_num); } return false; } diff --git a/runtime-common/stdlib/string/mbstring-functions.h b/runtime-common/stdlib/string/mbstring-functions.h index 6d0432ac9b..3f1516d2f3 100644 --- a/runtime-common/stdlib/string/mbstring-functions.h +++ b/runtime-common/stdlib/string/mbstring-functions.h @@ -15,15 +15,15 @@ bool f$mb_check_encoding(const string& str, const string& encoding = StringLibCo int64_t f$mb_strlen(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; -string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; +string mb_strtolower_impl(const string& str, void (*assertf)(bool), const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; -string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; +string mb_strtoupper_impl(const string& str, void (*assertf)(bool), const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; Optional f$mb_strpos(const string& haystack, const string& needle, int64_t offset = 0, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; -Optional f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0, - const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; +Optional mb_stripos_impl(const string& haystack, const string& needle, void (*assertf)(bool), int64_t offset = 0, + const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; string f$mb_substr(const string& str, int64_t start, const mixed& length = std::numeric_limits::max(), const string& encoding = StringLibConstants::get().CP1251_STR) noexcept; diff --git a/runtime-common/stdlib/string/string-functions.h b/runtime-common/stdlib/string/string-functions.h index d200a644b8..ce0784e8b6 100644 --- a/runtime-common/stdlib/string/string-functions.h +++ b/runtime-common/stdlib/string/string-functions.h @@ -11,6 +11,7 @@ #include #include +#include "common/unicode/unicode-utils.h" #include "runtime-common/core/runtime-core.h" #include "runtime-common/core/utils/kphp-assert-core.h" #include "runtime-common/stdlib/string/string-context.h" @@ -542,3 +543,34 @@ string str_concat(str_concat_arg s1, str_concat_arg s2) noexcept; string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) noexcept; string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) noexcept; string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) noexcept; + +namespace prepare_search_query_impl_ { + +inline constexpr size_t SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE; +inline constexpr size_t WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE; +inline constexpr size_t RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE; +inline constexpr size_t RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE; + +static_assert(SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + WORD_INDICES_SPAN_SIZE_IN_BYTES + RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES + + RESULT_BYTES_SPAN_SIZE_IN_BYTES < + StringLibContext::STATIC_BUFFER_LENGTH); + +inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0; +inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES; +inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + WORD_INDICES_SPAN_SIZE_IN_BYTES; +inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES; + +inline string prepare_search_query(const string& query, void (*assertf)(bool)) noexcept { + auto& string_lib_ctx{StringLibContext::get()}; + int32_t* code_points{reinterpret_cast(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::SOURCE_CODE_POINTS_SPAN_BEGIN))}; + size_t* word_start_indices{reinterpret_cast(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::WORD_INDICES_SPAN_BEGIN))}; + int32_t* prepared_code_points{ + reinterpret_cast(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::RESULT_CODE_POINTS_SPAN_BEGIN))}; + std::byte* utf8_result{reinterpret_cast(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::RESULT_BYTES_SPAN_BEGIN))}; + + size_t length{clean_str(query.c_str(), code_points, word_start_indices, prepared_code_points, utf8_result, assertf)}; + + return {reinterpret_cast(utf8_result), static_cast(length)}; +} + +} // namespace prepare_search_query_impl_ diff --git a/runtime-light/stdlib/string/mbstring-functions.h b/runtime-light/stdlib/string/mbstring-functions.h new file mode 100644 index 0000000000..5cddc4d954 --- /dev/null +++ b/runtime-light/stdlib/string/mbstring-functions.h @@ -0,0 +1,25 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include + +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/mbstring-functions.h" +#include "runtime-common/stdlib/string/string-context.h" +#include "runtime-light/stdlib/diagnostics/logs.h" + +inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_strtolower_impl(str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding); +} + +inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_strtoupper_impl(str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding); +} + +inline Optional f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0, + const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_stripos_impl(haystack, needle, [](bool condition) noexcept { kphp::log::assertion(condition); }, offset, encoding); +} diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h index 28b7ad35c6..aa6b147dde 100644 --- a/runtime-light/stdlib/string/string-functions.h +++ b/runtime-light/stdlib/string/string-functions.h @@ -7,7 +7,13 @@ #include #include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/string-functions.h" #include "runtime-light/k2-platform/k2-api.h" +#include "runtime-light/stdlib/diagnostics/logs.h" + +inline string f$prepare_search_query(const string& query) noexcept { + return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) noexcept { kphp::log::assertion(condition); }); +} inline Optional f$setlocale(int64_t category, const string& locale) noexcept { const int32_t i32category{static_cast(category)}; diff --git a/runtime/mbstring-functions.h b/runtime/mbstring-functions.h new file mode 100644 index 0000000000..c61679b1cd --- /dev/null +++ b/runtime/mbstring-functions.h @@ -0,0 +1,25 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include + +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/string/mbstring-functions.h" +#include "runtime-common/stdlib/string/string-context.h" + +inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_strtolower_impl(str, [](bool condition) noexcept { assert(condition); }, encoding); +} + +inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_strtoupper_impl(str, [](bool condition) noexcept { assert(condition); }, encoding); +} + +inline Optional f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0, + const string& encoding = StringLibConstants::get().CP1251_STR) noexcept { + return mb_stripos_impl(haystack, needle, [](bool condition) noexcept { assert(condition); }, offset, encoding); +} diff --git a/runtime/string_functions.cpp b/runtime/string_functions.cpp index 62faaad12b..28f7c378dc 100644 --- a/runtime/string_functions.cpp +++ b/runtime/string_functions.cpp @@ -34,11 +34,7 @@ Optional f$setlocale(int64_t category, const string& locale) noexcept { } string f$prepare_search_query(const string& query) noexcept { - const char* s = clean_str(query.c_str()); - if (s == nullptr) { - s = ""; - } - return string(s); + return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) noexcept { assert(condition); }); } // Based on `getcsv` from `streams` diff --git a/tests/python/tests/prepare_search_query/__init__.py b/tests/python/tests/prepare_search_query/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/python/tests/prepare_search_query/data/example1 b/tests/python/tests/prepare_search_query/data/example1 new file mode 100644 index 0000000000..c59e725370 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example1 @@ -0,0 +1,17 @@ + abacaba dAbAcAbA АбАсАБа йфяывчАСПМИРТОЬЛЩЗБДЮЭ. + К4ЙГЩ ЩГ рщг №кКЙ РШ зй021к 01293г0129 г + + ++_+ +_ +_ +__ ++_ Щ+!"_ №+!_" №+!"_ №+_ "Щ+_ "Щ + + йк й3 к2 + + + +7 88 76кн 68е79 н8г9 ншп + + test test + + test test +test +test +test test TeSt tEsT diff --git a/tests/python/tests/prepare_search_query/data/example10 b/tests/python/tests/prepare_search_query/data/example10 new file mode 100644 index 0000000000..a964b269f1 Binary files /dev/null and b/tests/python/tests/prepare_search_query/data/example10 differ diff --git a/tests/python/tests/prepare_search_query/data/example10_prepared b/tests/python/tests/prepare_search_query/data/example10_prepared new file mode 100644 index 0000000000..7311741531 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example10_prepared @@ -0,0 +1 @@ +0wp+0ۏń䢎+2mj媸+4ȁ뮨戯+7+9ߎ+a+aնݑŭ+a꾙n+a􇁃6v+b+bwtr+c赢θb+d+dj+e+eʔdj􅊵mb+fǽkʀ+g+hq+hyb酢+i+ifԗ0ꋷ+isɞe+iւz+iפá+jn+kf+ne󷆤p7㮐ӷae+n綍ǥwl+o+oye+p+rji+rs+s+sd+sr+t+u+w+x挨sq+y+yrsao+yy+zl值񦍛+ņ+ȯ+ɇ+ɋf2ʄa+ρ+ϟα+є+ӈoį+ӡ᫉v+աx+׭vd+ل蒍+ٮ駗+ޛ+ᤁ+ᵯǘ+嫢򙫓+漞ӟ+꽂+뙚դb򺝆o++d++𰛤է+񱷝5+󿀆𥜧+ diff --git a/tests/python/tests/prepare_search_query/data/example1_prepared b/tests/python/tests/prepare_search_query/data/example1_prepared new file mode 100644 index 0000000000..1dc52ae97e --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example1_prepared @@ -0,0 +1 @@ +01293г0129+68е79+7+76кн+88+abacaba+dabacaba+test+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к2+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+ diff --git a/tests/python/tests/prepare_search_query/data/example2 b/tests/python/tests/prepare_search_query/data/example2 new file mode 100644 index 0000000000..9bda8c35c2 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example2 @@ -0,0 +1 @@ +Y diff --git a/tests/python/tests/prepare_search_query/data/example2_prepared b/tests/python/tests/prepare_search_query/data/example2_prepared new file mode 100644 index 0000000000..469527404f --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example2_prepared @@ -0,0 +1 @@ +y+ diff --git a/tests/python/tests/prepare_search_query/data/example3 b/tests/python/tests/prepare_search_query/data/example3 new file mode 100644 index 0000000000..6178079822 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example3 @@ -0,0 +1 @@ +b diff --git a/tests/python/tests/prepare_search_query/data/example3_prepared b/tests/python/tests/prepare_search_query/data/example3_prepared new file mode 100644 index 0000000000..071dc66971 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example3_prepared @@ -0,0 +1 @@ +b+ diff --git a/tests/python/tests/prepare_search_query/data/example4 b/tests/python/tests/prepare_search_query/data/example4 new file mode 100644 index 0000000000..36774f9fe7 --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example4 @@ -0,0 +1 @@ +⚞žPuRZC[ diff --git a/tests/python/tests/prepare_search_query/data/example4_prepared b/tests/python/tests/prepare_search_query/data/example4_prepared new file mode 100644 index 0000000000..f31ecc781f --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example4_prepared @@ -0,0 +1 @@ +urzc+žp+ diff --git a/tests/python/tests/prepare_search_query/data/example5 b/tests/python/tests/prepare_search_query/data/example5 new file mode 100644 index 0000000000..8dd45ae465 Binary files /dev/null and b/tests/python/tests/prepare_search_query/data/example5 differ diff --git a/tests/python/tests/prepare_search_query/data/example5_prepared b/tests/python/tests/prepare_search_query/data/example5_prepared new file mode 100644 index 0000000000..2daa175e5d --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example5_prepared @@ -0,0 +1 @@ +8յ+c+і􃿊aen7+ᖦ+ diff --git a/tests/python/tests/prepare_search_query/data/example6 b/tests/python/tests/prepare_search_query/data/example6 new file mode 100644 index 0000000000..95cc2606dc --- /dev/null +++ b/tests/python/tests/prepare_search_query/data/example6 @@ -0,0 +1 @@ +׬qԻė^#xܵ칈T8y+䣳 V,ڦAڍ< $res); + echo json_encode($resp); +} + +main(); diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py new file mode 100644 index 0000000000..8b3dd04242 --- /dev/null +++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py @@ -0,0 +1,25 @@ +import json +import os +from python.lib.testcase import WebServerAutoTestCase + +directory_path = os.path.join(os.path.dirname(__file__), "data") +prepared_suffix = "_prepared" + + +class TestPrepareSearchQuery(WebServerAutoTestCase): + def test_prepare_search_query(self): + for file in os.listdir(directory_path): + if not os.path.basename(file).endswith(prepared_suffix): + with open(os.path.join(directory_path, file), "r") as query_file: + with open(os.path.join(directory_path, file + prepared_suffix), "r") as prepared_query_file: + query = query_file.read() + expected_prepared_query = prepared_query_file.read() + if len(expected_prepared_query) > 0 and expected_prepared_query[-1] == '\n': + expected_prepared_query = expected_prepared_query[:-1] + + headers = {"Content-Type": "text/plain; charset=utf-8"} + resp = self.web_server.http_post(headers=headers, data=query.encode("utf-8")) + + self.assertEqual(resp.status_code, 200) + result = json.loads(resp.text)["POST_BODY"] + self.assertEqual(result, expected_prepared_query)