From 6d5fd395b6202e6fe8440e5f9191f73ffe7e1eaf Mon Sep 17 00:00:00 2001 From: Nikita Goryanoy Date: Thu, 9 Oct 2025 12:34:32 +0300 Subject: [PATCH 01/26] [k2] add support multipart/form-data to HTTP server --- runtime-light/server/http/http-server-state.h | 1 + runtime-light/server/http/init-functions.cpp | 23 +- runtime-light/server/http/multipart.cpp | 319 ++++++++++++++++++ runtime-light/server/http/multipart.h | 15 + runtime-light/server/server.cmake | 1 + 5 files changed, 356 insertions(+), 3 deletions(-) create mode 100644 runtime-light/server/http/multipart.cpp create mode 100644 runtime-light/server/http/multipart.h diff --git a/runtime-light/server/http/http-server-state.h b/runtime-light/server/http/http-server-state.h index 261a35dce4..39d9e4d556 100644 --- a/runtime-light/server/http/http-server-state.h +++ b/runtime-light/server/http/http-server-state.h @@ -46,6 +46,7 @@ inline constexpr std::string_view CONTENT_LENGTH = "content-length"; inline constexpr std::string_view AUTHORIZATION = "authorization"; inline constexpr std::string_view ACCEPT_ENCODING = "accept-encoding"; inline constexpr std::string_view CONTENT_ENCODING = "content-encoding"; +inline constexpr std::string_view CONTENT_DISPOSITION = "content-disposition"; } // namespace headers diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index eda7340671..274d56df9c 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -26,12 +26,14 @@ #include "runtime-light/core/globals/php-script-globals.h" #include "runtime-light/k2-platform/k2-api.h" #include "runtime-light/server/http/http-server-state.h" +#include "runtime-light/server/http/multipart.h" #include "runtime-light/state/instance-state.h" #include "runtime-light/stdlib/component/component-api.h" #include "runtime-light/stdlib/diagnostics/logs.h" #include "runtime-light/stdlib/output/output-state.h" #include "runtime-light/stdlib/server/http-functions.h" #include "runtime-light/stdlib/zlib/zlib-functions.h" +#include "runtime-light/stdlib/file/file-system-functions.h" #include "runtime-light/streams/stream.h" #include "runtime-light/tl/tl-core.h" #include "runtime-light/tl/tl-functions.h" @@ -320,14 +322,16 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector(invoke_http.body.data()), static_cast(invoke_http.body.size())}; if (!std::ranges::search(content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) { - string body{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; f$parse_str(body, superglobals.v$_POST); http_server_instance_st.opt_raw_post_data.emplace(std::move(body)); } else if (!std::ranges::search(content_type, CONTENT_TYPE_MULTIPART_FORM_DATA).empty()) { - kphp::log::error("unsupported content-type: {}", CONTENT_TYPE_MULTIPART_FORM_DATA); + std::optional boundary{parse_boundary(content_type)}; + if (!boundary) { + kphp::http::parse_multipart({body.c_str(), body.size()}, boundary.value(), superglobals.v$_POST, superglobals.v$_FILES); + } } else { - string body{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; http_server_instance_st.opt_raw_post_data.emplace(std::move(body)); } @@ -379,6 +383,7 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector finalize_server() noexcept { auto& http_server_instance_st{HttpServerInstanceState::get()}; + auto& superglobals{InstanceState::get().php_script_mutable_globals_singleton.get_superglobals()}; string response_body{}; tl::HttpResponse http_response{}; @@ -433,6 +438,18 @@ kphp::coro::task<> finalize_server() noexcept { [[fallthrough]]; } case kphp::http::response_state::completed: + const array files = superglobals.v$_FILES.to_array(); + for (array::const_iterator it = files.begin(); it != files.end(); ++it) { + const mixed& file = it.get_value(); + + if (!file.is_array()) { + kphp::log::error("$_FILES contains a value that is not an array"); + continue; + } + + const mixed tmp_filename = file.get_value(string("tmp_name")); + f$unlink(tmp_filename.to_string()); + } co_return; } } diff --git a/runtime-light/server/http/multipart.cpp b/runtime-light/server/http/multipart.cpp new file mode 100644 index 0000000000..e763ef3dd0 --- /dev/null +++ b/runtime-light/server/http/multipart.cpp @@ -0,0 +1,319 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-light/server/http/multipart.h" +#include "runtime-light/stdlib/file/resource.h" +#include "runtime-light/stdlib/diagnostics/logs.h" +#include "runtime-light/server/http/http-server-state.h" +#include "runtime-light/stdlib/math/random-functions.h" + +#include +#include + +#include "runtime-common/core/runtime-core.h" +#include "common/algorithms/string-algorithms.h" + +namespace { + +constexpr int8_t TMP_FILENAME_LENGTH = 10; +constexpr std::string_view TMP_DIR = "/tmp/"; + +constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;"; +constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; + +struct header { + header() = delete; + + private: + header(std::string_view name_view, std::string_view value_view) : name{name_view}, value{value_view} {} + + public: + std::string_view name; + std::string_view value; + + static std::optional
create(std::string_view header_string) noexcept { + auto [name_view, value_view]{vk::split_string_view(header_string, ':')}; + if (name_view.size() + value_view.size() + 1 != header_string.size()) [[unlikely]] { + return std::nullopt; + } + if (name_view.empty() || value_view.empty()) { + return std::nullopt; + } + return header(name_view, value_view.substr(1)); + } + + bool name_is(const std::string_view s) { + const auto lower_name{name | std::views::take(s.size()) | + std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; + return std::ranges::equal(lower_name, s); + } +}; + +// Represents one attribute from Content-Disposition header. +// For example, a typically file field will have two attributes: +// 1) attr = "name", value = "avatar" +// 2) attr = "filename", value = "my_avatar.png" +struct partAttr { + std::string_view attr; + std::string_view value; + + partAttr() = delete; + partAttr(const std::string_view attr_, const std::string_view value_) : attr{attr_}, value{value_} {}; +}; + +// Represents one part of multipart content +struct part { + std::string_view name; + std::string_view filename; + std::string_view content_type; + std::string_view data; +}; + +class MultipartBody { + private: + + std::string_view body; + std::string_view boundary; + size_t pos; + + std::optional next_part(); + void addPost(const part &part, mixed &v$_POST); + void addFile(const part &part, mixed &v$_FILES); + + std::optional
next_header(); + std::string_view parse_data(); + + // Returns true if current pos refers to one of \r or \n + bool is_crlf() { + return body[pos] == '\r' || body[pos] == '\n'; + } + + void skip_crlf() { + if (body[pos] == '\r') { + pos++; + } + if (body[pos] == '\n') { + pos++; + } + } + + void skip_boundary() { + if (pos == 0) { + pos += 2; + } + pos += boundary.size(); + if (body[pos] == '-' && body[pos+1] == '-') { + pos += 2; + } + } + + public: + + MultipartBody(const std::string_view body_, const std::string_view boundary_) + : body{body_}, boundary{boundary_}, pos{0} {} + + void parse_into(mixed &v$_POST, mixed &v$_FILES); +}; + +std::optional MultipartBody::next_part() { + part part; + + if (pos == 0) { + skip_boundary(); + skip_crlf(); + } + + do { + std::optional
maybe_header{next_header()}; + if (!maybe_header) { + return std::nullopt; + } + header header{maybe_header.value()}; + + if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { + if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { + return std::nullopt; + } + auto attrs = std::views::split(header.value, ";") | std::views::transform([](auto part) { + std::string_view part_view{vk::trim(std::string_view(part))}; + auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; + if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { + value_view = value_view.substr(1, value_view.size()-2); + } + return partAttr{name_view, value_view}; + }); + + for (partAttr a : attrs) { + if (a.attr.empty()) { + return std::nullopt; + } + if (a.attr == "name") { + part.name = a.value; + } else if (a.attr == "filename") { + part.filename = a.value; + } + } + } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { + part.content_type = header.value; + } + } while (!is_crlf()); + + skip_crlf(); + part.data = parse_data(); + skip_boundary(); + skip_crlf(); + return part; +} + +std::optional
MultipartBody::next_header() { + size_t lf{body.find('\n', pos)}; + size_t header_end{lf-1}; + + if (lf == std::string_view::npos) { + return std::nullopt; + } + + if (body[header_end] == '\r') { + header_end--; + } + + auto res{header::create(body.substr(pos, header_end-pos+1))}; + pos = lf + 1; + return res; +} + +std::string_view MultipartBody::parse_data() { + size_t data_start{pos}; + size_t data_end{body.find(boundary, data_start)}; + pos = data_end; + + if (pos == std::string_view::npos) { + return {}; + } + + if (body[data_end-1] != '-' || body[data_end-2] != '-') { + return {}; + } + data_end -= 2; + if (body[data_end] == '\n') { + data_end--; + } + if (body[data_end] == '\r') { + data_end--; + } + + if (data_end > data_start) { + return body.substr(data_start, data_end-data_start-1); + } + + return {}; + +} + +void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) { + std::optional maybe_part; + while ((maybe_part = next_part())) { + part p{maybe_part.value()}; + if (p.name.empty()) { + return; + } + if (!p.filename.empty()) { + addFile(p, v$_FILES); + } else { + addPost(p, v$_POST); + } + } +} + +void MultipartBody::addPost(const part &part, mixed &v$_POST) { + string name{part.name.data(), static_cast(part.name.size())}; + v$_POST.set_value(name, string(part.data.data(), part.data.size())); +} + +void MultipartBody::addFile(const part &part, mixed &v$_FILES) { + //TODO: replace f$random_bytes to avoid string allocation + Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; + + if (!rand_str.has_value()) { + kphp::log::warning("error generating random_bytes for tmp file"); + return; + } + + string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()}; + tmp_name_str.append(rand_str.val()); + std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()}; + + auto file{kphp::fs::file::open(tmp_name, "w")}; + if (!file) [[unlikely]] { + kphp::log::warning("error opening tmp file {}: error code -> {}", tmp_name, file.error()); + return; + } + + int file_size{0}; + auto file_size_result = (*file).write({reinterpret_cast(part.data.data()), part.data.size()}); + if (file_size_result.has_value()) { + file_size = file_size_result.value(); + if (file_size < part.data.size()) { + kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.data.size()); + return; + } + } else { + kphp::log::warning("error write to tmp file: errcode {}", file_size_result.error()); + } + + string name{part.name.data(), static_cast(part.name.size())}; + + if (part.name.ends_with("[]")) { + mixed& file = v$_FILES[name.substr(0, name.size() - 2)]; + if (file_size == part.data.size()) { + file[string("name")].push_back(string(part.filename.data(), part.filename.size())); + file[string("type")].push_back(string(part.content_type.data(), part.content_type.size())); + file[string("size")].push_back(file_size); + file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); + file[string("error")].push_back(0); + } else { + file[string("name")].push_back(string()); + file[string("type")].push_back(string()); + file[string("size")].push_back(0); + file[string("tmp_name")].push_back(string()); + file[string("error")].push_back(-file_size); + } + } else { + mixed& file = v$_FILES[name]; + if (file_size == part.data.size()) { + file.set_value(string("name"), string(part.filename.data(), part.filename.size())); + file.set_value(string("type"), string(part.content_type.data(), part.content_type.size())); + file.set_value(string("size"), file_size); + file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); + file.set_value(string("error"), 0); + } else { + file.set_value(string("size"), 0); + file.set_value(string("tmp_name"), string()); + file.set_value(string("error"), -file_size); + } + } +} + +} // namespace + +namespace kphp::http { + +void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) { + MultipartBody mb{body, boundary}; + mb.parse_into(v$_POST, v$_FILES); +} + +std::optional parse_boundary(const std::string_view content_type) { + size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; + if (pos == std::string_view::npos) { + return std::nullopt; + } + std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; + if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) { + res = res.substr(1, res.size()-2); + } + return res; +} + +} // namespace kphp::http diff --git a/runtime-light/server/http/multipart.h b/runtime-light/server/http/multipart.h new file mode 100644 index 0000000000..d01e5a88e1 --- /dev/null +++ b/runtime-light/server/http/multipart.h @@ -0,0 +1,15 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2024 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include + +#include "runtime-common/core/runtime-core.h" + +namespace kphp::http { + +void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES); + +std::optional parse_boundary(const std::string_view content_type); + +} // namespace kphp::http diff --git a/runtime-light/server/server.cmake b/runtime-light/server/server.cmake index d61452945a..5b37757b34 100644 --- a/runtime-light/server/server.cmake +++ b/runtime-light/server/server.cmake @@ -3,6 +3,7 @@ prepend( server/ cli/cli-instance-state.cpp http/init-functions.cpp + http/multipart.cpp http/http-server-state.cpp job-worker/job-worker-server-state.cpp rpc/init-functions.cpp From 260040ef319dc3ce3adafd96a2cbbb5b26b57019 Mon Sep 17 00:00:00 2001 From: Nikita Goryanoy Date: Sun, 1 Mar 2026 12:02:08 +0000 Subject: [PATCH 02/26] [k2] refactor multipart/form-data via ranges --- runtime-light/server/http/multipart.cpp | 206 ++++++++++-------------- 1 file changed, 86 insertions(+), 120 deletions(-) diff --git a/runtime-light/server/http/multipart.cpp b/runtime-light/server/http/multipart.cpp index e763ef3dd0..036b84824f 100644 --- a/runtime-light/server/http/multipart.cpp +++ b/runtime-light/server/http/multipart.cpp @@ -43,7 +43,7 @@ struct header { return header(name_view, value_view.substr(1)); } - bool name_is(const std::string_view s) { + bool name_is(std::string_view s) { const auto lower_name{name | std::views::take(s.size()) | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; return std::ranges::equal(lower_name, s); @@ -55,11 +55,11 @@ struct header { // 1) attr = "name", value = "avatar" // 2) attr = "filename", value = "my_avatar.png" struct partAttr { - std::string_view attr; - std::string_view value; + const std::string_view attr; + const std::string_view value; partAttr() = delete; - partAttr(const std::string_view attr_, const std::string_view value_) : attr{attr_}, value{value_} {}; + partAttr(std::string_view attr_, std::string_view value_) : attr{attr_}, value{value_} {}; }; // Represents one part of multipart content @@ -72,148 +72,114 @@ struct part { class MultipartBody { private: - std::string_view body; std::string_view boundary; - size_t pos; - std::optional next_part(); void addPost(const part &part, mixed &v$_POST); void addFile(const part &part, mixed &v$_FILES); - - std::optional
next_header(); - std::string_view parse_data(); - - // Returns true if current pos refers to one of \r or \n - bool is_crlf() { - return body[pos] == '\r' || body[pos] == '\n'; - } - - void skip_crlf() { - if (body[pos] == '\r') { - pos++; - } - if (body[pos] == '\n') { - pos++; - } - } - - void skip_boundary() { - if (pos == 0) { - pos += 2; - } - pos += boundary.size(); - if (body[pos] == '-' && body[pos+1] == '-') { - pos += 2; - } - } public: - - MultipartBody(const std::string_view body_, const std::string_view boundary_) - : body{body_}, boundary{boundary_}, pos{0} {} + MultipartBody(std::string_view body_, std::string_view boundary_) + : body{body_}, boundary{boundary_} {} void parse_into(mixed &v$_POST, mixed &v$_FILES); }; -std::optional MultipartBody::next_part() { - part part; - - if (pos == 0) { - skip_boundary(); - skip_crlf(); +std::string_view trim_crlf(std::string_view s) { + if (s.starts_with('\r')) { + s = s.substr(1); } - - do { - std::optional
maybe_header{next_header()}; - if (!maybe_header) { - return std::nullopt; - } - header header{maybe_header.value()}; - - if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { - if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { - return std::nullopt; - } - auto attrs = std::views::split(header.value, ";") | std::views::transform([](auto part) { - std::string_view part_view{vk::trim(std::string_view(part))}; - auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; - if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { - value_view = value_view.substr(1, value_view.size()-2); - } - return partAttr{name_view, value_view}; - }); - - for (partAttr a : attrs) { - if (a.attr.empty()) { - return std::nullopt; - } - if (a.attr == "name") { - part.name = a.value; - } else if (a.attr == "filename") { - part.filename = a.value; - } - } - } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - part.content_type = header.value; - } - } while (!is_crlf()); - - skip_crlf(); - part.data = parse_data(); - skip_boundary(); - skip_crlf(); - return part; + if (s.starts_with('\n')) { + s = s.substr(1); + } + if (s.ends_with('\n')) { + s = s.substr(0, s.size()-1); + } + if (s.ends_with('\r')) { + s = s.substr(0, s.size()-1); + } + return s; } -std::optional
MultipartBody::next_header() { - size_t lf{body.find('\n', pos)}; +std::pair, std::string_view> parse_next_header(std::string_view s) { + size_t lf{s.find('\n')}; size_t header_end{lf-1}; if (lf == std::string_view::npos) { - return std::nullopt; + return {std::nullopt, s}; } - if (body[header_end] == '\r') { + if (s[header_end] == '\r') { header_end--; } - auto res{header::create(body.substr(pos, header_end-pos+1))}; - pos = lf + 1; - return res; + auto hdr{header::create(s.substr(0, header_end+1))}; + s = s.substr(lf+1); + return std::pair, std::string_view>(hdr, s); } -std::string_view MultipartBody::parse_data() { - size_t data_start{pos}; - size_t data_end{body.find(boundary, data_start)}; - pos = data_end; - - if (pos == std::string_view::npos) { - return {}; - } - - if (body[data_end-1] != '-' || body[data_end-2] != '-') { - return {}; - } - data_end -= 2; - if (body[data_end] == '\n') { - data_end--; - } - if (body[data_end] == '\r') { - data_end--; - } +auto parse_attrs(std::string_view header_value) { + return std::views::split(header_value, ";") | std::views::transform([](auto part) { + std::string_view part_view{vk::trim(std::string_view(part))}; + auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; + if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { + value_view = value_view.substr(1, value_view.size()-2); + } + return partAttr{name_view, value_view}; + }); +} - if (data_end > data_start) { - return body.substr(data_start, data_end-data_start-1); +void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) { + body = trim_crlf(body); + if (!body.ends_with("--")) { + return; } - return {}; - -} + auto parts = std::views::split(body, boundary) | std::views::transform([](auto raw_part) noexcept -> std::optional { + std::string_view str{raw_part}; + if (!str.ends_with("--")) { + return std::nullopt; + } + // cut "--" before next boundary + str = str.substr(0, str.size() - 2); + // trim \r\n + str = trim_crlf(str); + + part part; + while (true) { + auto [maybe_header, new_str] = parse_next_header(str); + if (!maybe_header) { + break; + } + str = new_str; + header header{maybe_header.value()}; + if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { + if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { + return std::nullopt; + } + auto attrs = parse_attrs(header.value); + for (partAttr a : attrs) { + if (a.attr.empty()) { + return std::nullopt; + } + if (a.attr == "name") { + part.name = a.value; + } else if (a.attr == "filename") { + part.filename = a.value; + } + } + } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { + part.content_type = header.value; + } + } + part.data = str; + return part; + }); -void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) { - std::optional maybe_part; - while ((maybe_part = next_part())) { + for (std::optional maybe_part : parts) { + if (!maybe_part) { + continue; + } part p{maybe_part.value()}; if (p.name.empty()) { return; @@ -299,12 +265,12 @@ void MultipartBody::addFile(const part &part, mixed &v$_FILES) { namespace kphp::http { -void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) { +void parse_multipart(std::string_view body, std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) { MultipartBody mb{body, boundary}; mb.parse_into(v$_POST, v$_FILES); } -std::optional parse_boundary(const std::string_view content_type) { +std::optional parse_boundary(std::string_view content_type) { size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; if (pos == std::string_view::npos) { return std::nullopt; From 7df626cae3a46e69cdabf8a872c83ac96c69de3f Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 4 Mar 2026 15:53:43 +0300 Subject: [PATCH 03/26] wip --- .idea/misc.xml | 5 +- runtime-light/server/http/init-functions.cpp | 6 +- runtime-light/server/http/multipart.cpp | 226 +++++++++---------- runtime-light/server/http/multipart.h | 11 +- 4 files changed, 125 insertions(+), 123 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 5ff349e446..307dd609cd 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,5 +1,8 @@ + + @@ -28,4 +31,4 @@ - + \ No newline at end of file diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 274d56df9c..385d844ad0 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -327,9 +327,9 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector boundary{parse_boundary(content_type)}; - if (!boundary) { - kphp::http::parse_multipart({body.c_str(), body.size()}, boundary.value(), superglobals.v$_POST, superglobals.v$_FILES); + auto boundary_opt{kphp::http::multipart::extract_boundary(content_type)}; + if (boundary_opt.has_value()) { + kphp::http::multipart::parse_multipart({body.c_str(), body.size()}, *boundary_opt, superglobals.v$_POST, superglobals.v$_FILES); } } else { http_server_instance_st.opt_raw_post_data.emplace(std::move(body)); diff --git a/runtime-light/server/http/multipart.cpp b/runtime-light/server/http/multipart.cpp index 036b84824f..74010059bb 100644 --- a/runtime-light/server/http/multipart.cpp +++ b/runtime-light/server/http/multipart.cpp @@ -2,17 +2,20 @@ // Copyright (c) 2024 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt -#include "runtime-light/server/http/multipart.h" -#include "runtime-light/stdlib/file/resource.h" -#include "runtime-light/stdlib/diagnostics/logs.h" -#include "runtime-light/server/http/http-server-state.h" -#include "runtime-light/stdlib/math/random-functions.h" - -#include +#include #include +#include +#include +#include +#include -#include "runtime-common/core/runtime-core.h" #include "common/algorithms/string-algorithms.h" +#include "runtime-common/core/runtime-core.h" +#include "runtime-light/server/http/http-server-state.h" +#include "runtime-light/server/http/multipart.h" +#include "runtime-light/stdlib/diagnostics/logs.h" +#include "runtime-light/stdlib/file/resource.h" +#include "runtime-light/stdlib/math/random-functions.h" namespace { @@ -23,43 +26,35 @@ constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;"; constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; struct header { - header() = delete; - - private: - header(std::string_view name_view, std::string_view value_view) : name{name_view}, value{value_view} {} - - public: - std::string_view name; - std::string_view value; + const std::string_view name; + const std::string_view value; static std::optional
create(std::string_view header_string) noexcept { auto [name_view, value_view]{vk::split_string_view(header_string, ':')}; - if (name_view.size() + value_view.size() + 1 != header_string.size()) [[unlikely]] { - return std::nullopt; - } if (name_view.empty() || value_view.empty()) { return std::nullopt; } - return header(name_view, value_view.substr(1)); + return header{.name = name_view, .value = value_view.substr(1)}; } - bool name_is(std::string_view s) { - const auto lower_name{name | std::views::take(s.size()) | - std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; - return std::ranges::equal(lower_name, s); + bool name_is(std::string_view header_name) noexcept { + const auto lower_name{name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; + const auto lower_header_name{header_name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; + return std::ranges::equal(lower_name, lower_header_name); } }; -// Represents one attribute from Content-Disposition header. -// For example, a typically file field will have two attributes: +// Represents one attribute from Content-Disposition header. +// For example, a typically file field will have two attributes: // 1) attr = "name", value = "avatar" // 2) attr = "filename", value = "my_avatar.png" -struct partAttr { +struct part_attribute { const std::string_view attr; const std::string_view value; - partAttr() = delete; - partAttr(std::string_view attr_, std::string_view value_) : attr{attr_}, value{value_} {}; + part_attribute(std::string_view attr_, std::string_view value_) noexcept + : attr{attr_}, + value{value_} {}; }; // Represents one part of multipart content @@ -71,18 +66,18 @@ struct part { }; class MultipartBody { - private: - std::string_view body; - std::string_view boundary; - - void addPost(const part &part, mixed &v$_POST); - void addFile(const part &part, mixed &v$_FILES); - - public: - MultipartBody(std::string_view body_, std::string_view boundary_) - : body{body_}, boundary{boundary_} {} - - void parse_into(mixed &v$_POST, mixed &v$_FILES); + std::string_view body; + std::string_view boundary; + + void addPost(const part& part, mixed& v$_POST); + void addFile(const part& part, mixed& v$_FILES); + +public: + MultipartBody(std::string_view body_, std::string_view boundary_) + : body{body_}, + boundary{boundary_} {} + + void parse_into(mixed& v$_POST, mixed& v$_FILES); }; std::string_view trim_crlf(std::string_view s) { @@ -93,97 +88,99 @@ std::string_view trim_crlf(std::string_view s) { s = s.substr(1); } if (s.ends_with('\n')) { - s = s.substr(0, s.size()-1); + s = s.substr(0, s.size() - 1); } if (s.ends_with('\r')) { - s = s.substr(0, s.size()-1); + s = s.substr(0, s.size() - 1); } return s; } std::pair, std::string_view> parse_next_header(std::string_view s) { size_t lf{s.find('\n')}; - size_t header_end{lf-1}; - + size_t header_end{lf - 1}; + if (lf == std::string_view::npos) { - return {std::nullopt, s}; + return {std::nullopt, s}; } - + if (s[header_end] == '\r') { header_end--; } - auto hdr{header::create(s.substr(0, header_end+1))}; - s = s.substr(lf+1); + auto hdr{header::create(s.substr(0, header_end + 1))}; + s = s.substr(lf + 1); return std::pair, std::string_view>(hdr, s); } auto parse_attrs(std::string_view header_value) { return std::views::split(header_value, ";") | std::views::transform([](auto part) { - std::string_view part_view{vk::trim(std::string_view(part))}; - auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; - if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { - value_view = value_view.substr(1, value_view.size()-2); - } - return partAttr{name_view, value_view}; - }); + std::string_view part_view{vk::trim(std::string_view(part))}; + auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; + if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { + value_view = value_view.substr(1, value_view.size() - 2); + } + return part_attribute{name_view, value_view}; + }); } -void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) { - body = trim_crlf(body); - if (!body.ends_with("--")) { - return; - } - +void MultipartBody::parse_into(mixed& v$_POST, mixed& v$_FILES) { + kphp::log::info("body {}", body); + kphp::log::info("boundary {}", boundary); auto parts = std::views::split(body, boundary) | std::views::transform([](auto raw_part) noexcept -> std::optional { - std::string_view str{raw_part}; - if (!str.ends_with("--")) { - return std::nullopt; - } - // cut "--" before next boundary - str = str.substr(0, str.size() - 2); - // trim \r\n - str = trim_crlf(str); - - part part; - while (true) { - auto [maybe_header, new_str] = parse_next_header(str); - if (!maybe_header) { - break; - } - str = new_str; - header header{maybe_header.value()}; - if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { - if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { - return std::nullopt; - } - auto attrs = parse_attrs(header.value); - for (partAttr a : attrs) { - if (a.attr.empty()) { - return std::nullopt; - } - if (a.attr == "name") { - part.name = a.value; - } else if (a.attr == "filename") { - part.filename = a.value; - } - } - } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - part.content_type = header.value; - } - } - part.data = str; - return part; - }); + kphp::log::info("part of body {}", raw_part); + std::string_view str{raw_part}; + if (!str.ends_with("--")) { + return std::nullopt; + } + // cut "--" before next boundary + str = str.substr(0, str.size() - 2); + // trim \r\n + str = trim_crlf(str); + + part part; + while (true) { + auto [maybe_header, new_str] = parse_next_header(str); + if (!maybe_header) { + break; + } + str = new_str; + header header{maybe_header.value()}; + kphp::log::info("header name {}", header.name); + if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { + if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { + return std::nullopt; + } + auto attrs = parse_attrs(header.value); + for (part_attribute a : attrs) { + if (a.attr.empty()) { + return std::nullopt; + } + if (a.attr == "name") { + part.name = a.value; + } else if (a.attr == "filename") { + part.filename = a.value; + } + } + } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { + part.content_type = header.value; + } + } + part.data = str; + return part; + }); for (std::optional maybe_part : parts) { + kphp::log::info("process multipart part"); if (!maybe_part) { continue; } + kphp::log::info("process multipart non-null part"); part p{maybe_part.value()}; if (p.name.empty()) { - return; + continue; } + kphp::log::info("process multipart non-empty part"); if (!p.filename.empty()) { addFile(p, v$_FILES); } else { @@ -192,15 +189,15 @@ void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) { } } -void MultipartBody::addPost(const part &part, mixed &v$_POST) { +void MultipartBody::addPost(const part& part, mixed& v$_POST) { string name{part.name.data(), static_cast(part.name.size())}; v$_POST.set_value(name, string(part.data.data(), part.data.size())); } -void MultipartBody::addFile(const part &part, mixed &v$_FILES) { - //TODO: replace f$random_bytes to avoid string allocation +void MultipartBody::addFile(const part& part, mixed& v$_FILES) { + // TODO: replace f$random_bytes to avoid string allocation Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; - + if (!rand_str.has_value()) { kphp::log::warning("error generating random_bytes for tmp file"); return; @@ -209,7 +206,7 @@ void MultipartBody::addFile(const part &part, mixed &v$_FILES) { string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()}; tmp_name_str.append(rand_str.val()); std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()}; - + auto file{kphp::fs::file::open(tmp_name, "w")}; if (!file) [[unlikely]] { kphp::log::warning("error opening tmp file {}: error code -> {}", tmp_name, file.error()); @@ -263,23 +260,24 @@ void MultipartBody::addFile(const part &part, mixed &v$_FILES) { } // namespace -namespace kphp::http { +namespace kphp::http::multipart { -void parse_multipart(std::string_view body, std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) { - MultipartBody mb{body, boundary}; - mb.parse_into(v$_POST, v$_FILES); +void parse_multipart(std::string_view body, std::string_view boundary, mixed& v$_POST, mixed& v$_FILES) { + MultipartBody mb{body, boundary}; + mb.parse_into(v$_POST, v$_FILES); } -std::optional parse_boundary(std::string_view content_type) { +std::optional extract_boundary(std::string_view content_type) noexcept { size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; if (pos == std::string_view::npos) { return std::nullopt; } + // todo assert "body" std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) { - res = res.substr(1, res.size()-2); + res = res.substr(1, res.size() - 2); } return res; } -} // namespace kphp::http +} // namespace kphp::http::multipart diff --git a/runtime-light/server/http/multipart.h b/runtime-light/server/http/multipart.h index d01e5a88e1..88291febde 100644 --- a/runtime-light/server/http/multipart.h +++ b/runtime-light/server/http/multipart.h @@ -1,15 +1,16 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» +// Copyright (c) 2026 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt +#include #include #include "runtime-common/core/runtime-core.h" -namespace kphp::http { +namespace kphp::http::multipart { -void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES); +void parse_multipart(std::string_view body, std::string_view boundary, mixed& v$_POST, mixed& v$_FILES); -std::optional parse_boundary(const std::string_view content_type); +std::optional extract_boundary(std::string_view content_type) noexcept; -} // namespace kphp::http +} // namespace kphp::http::multipart From 4e5e7f417dc98326028dd78e918ba70259083d60 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Thu, 5 Mar 2026 15:24:51 +0300 Subject: [PATCH 04/26] wip --- runtime-light/server/http/init-functions.cpp | 18 +- runtime-light/server/http/multipart.cpp | 294 +++++++++---------- runtime-light/server/http/multipart.h | 6 +- 3 files changed, 151 insertions(+), 167 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 385d844ad0..477aa02006 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -30,10 +30,10 @@ #include "runtime-light/state/instance-state.h" #include "runtime-light/stdlib/component/component-api.h" #include "runtime-light/stdlib/diagnostics/logs.h" +#include "runtime-light/stdlib/file/file-system-functions.h" #include "runtime-light/stdlib/output/output-state.h" #include "runtime-light/stdlib/server/http-functions.h" #include "runtime-light/stdlib/zlib/zlib-functions.h" -#include "runtime-light/stdlib/file/file-system-functions.h" #include "runtime-light/streams/stream.h" #include "runtime-light/tl/tl-core.h" #include "runtime-light/tl/tl-functions.h" @@ -327,9 +327,9 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), - std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{ - .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform( + http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), + [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } diff --git a/runtime-light/server/http/multipart.cpp b/runtime-light/server/http/multipart.cpp index 74010059bb..d744e8207b 100644 --- a/runtime-light/server/http/multipart.cpp +++ b/runtime-light/server/http/multipart.cpp @@ -1,5 +1,5 @@ // Compiler for PHP (aka KPHP) -// Copyright (c) 2024 LLC «V Kontakte» +// Copyright (c) 2026 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt #include @@ -25,177 +25,149 @@ constexpr std::string_view TMP_DIR = "/tmp/"; constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;"; constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; -struct header { +std::string_view trim_crlf(std::string_view sv) { + if (sv.starts_with('\r')) { + sv.remove_prefix(1); + } + if (sv.starts_with('\n')) { + sv.remove_prefix(1); + } + + if (sv.ends_with('\n')) { + sv.remove_suffix(1); + } + if (sv.ends_with('\r')) { + sv.remove_suffix(1); + } + return sv; +} + +struct part_header { const std::string_view name; const std::string_view value; - static std::optional
create(std::string_view header_string) noexcept { - auto [name_view, value_view]{vk::split_string_view(header_string, ':')}; + static std::optional parse(std::string_view header) noexcept { + auto [name_view, value_view]{vk::split_string_view(header, ':')}; if (name_view.empty() || value_view.empty()) { return std::nullopt; } - return header{.name = name_view, .value = value_view.substr(1)}; + return part_header{name_view, value_view.substr(1)}; } - bool name_is(std::string_view header_name) noexcept { + bool name_is(std::string_view header_name) const noexcept { const auto lower_name{name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; const auto lower_header_name{header_name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; return std::ranges::equal(lower_name, lower_header_name); } + +private: + part_header(std::string_view name, std::string_view value) noexcept + : name(name), + value(value) {} }; -// Represents one attribute from Content-Disposition header. -// For example, a typically file field will have two attributes: -// 1) attr = "name", value = "avatar" -// 2) attr = "filename", value = "my_avatar.png" +auto parse_headers(std::string_view sv) noexcept { + static constexpr std::string_view DELIM = "\r\n"; + return std::views::split(sv, DELIM) | std::views::transform([](auto raw_header) { return part_header::parse(std::string_view(raw_header)); }) | + std::views::take_while([](auto header_opt) { return header_opt.has_value(); }) | std::views::transform([](auto header_opt) { return *header_opt; }); +} + struct part_attribute { - const std::string_view attr; + const std::string_view name; const std::string_view value; - part_attribute(std::string_view attr_, std::string_view value_) noexcept - : attr{attr_}, - value{value_} {}; -}; + static std::optional parse(std::string_view attribute) noexcept { + auto [name_view, value_view]{vk::split_string_view(vk::trim(attribute), '=')}; + if (name_view.empty() || value_view.empty()) { + return std::nullopt; + } + // todo assert "value" + if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { + value_view = value_view.substr(1, value_view.size() - 2); + } + return part_attribute{name_view, value_view}; + } -// Represents one part of multipart content -struct part { - std::string_view name; - std::string_view filename; - std::string_view content_type; - std::string_view data; +private: + part_attribute(std::string_view name, std::string_view value) noexcept + : name(name), + value(value) {} }; -class MultipartBody { - std::string_view body; - std::string_view boundary; - - void addPost(const part& part, mixed& v$_POST); - void addFile(const part& part, mixed& v$_FILES); - -public: - MultipartBody(std::string_view body_, std::string_view boundary_) - : body{body_}, - boundary{boundary_} {} +auto parse_attrs(std::string_view header_value) noexcept { + static constexpr std::string_view DELIM = ";"; + return std::views::split(header_value, DELIM) | std::views::transform([](auto part) { return part_attribute::parse(std::string_view(part)); }) | + std::views::take_while([](auto attribute_opt) { return attribute_opt.has_value(); }) | + std::views::transform([](auto attribute_opt) { return *attribute_opt; }); +} - void parse_into(mixed& v$_POST, mixed& v$_FILES); -}; +struct part { + std::string_view name_attribute; + std::optional filename_attribute; + std::optional content_type; + std::string_view body; -std::string_view trim_crlf(std::string_view s) { - if (s.starts_with('\r')) { - s = s.substr(1); - } - if (s.starts_with('\n')) { - s = s.substr(1); - } - if (s.ends_with('\n')) { - s = s.substr(0, s.size() - 1); - } - if (s.ends_with('\r')) { - s = s.substr(0, s.size() - 1); - } - return s; -} + static std::optional parse(std::string_view part_view) { + static constexpr std::string_view PART_BODY_DELIM = "\r\n\r\n"; -std::pair, std::string_view> parse_next_header(std::string_view s) { - size_t lf{s.find('\n')}; - size_t header_end{lf - 1}; + const size_t part_body_start{part_view.find(PART_BODY_DELIM)}; + if (part_body_start == std::string_view::npos) { + return std::nullopt; + } - if (lf == std::string_view::npos) { - return {std::nullopt, s}; - } + const std::string_view part_headers{part_view.substr(0, part_body_start)}; + const std::string_view part_body{part_view.substr(part_body_start + PART_BODY_DELIM.size())}; + + part part; + for (const auto& header : parse_headers(part_headers)) { + if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { + if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { + return std::nullopt; + } + + size_t pos = header.value.find(';'); + std::string_view attributes = header.value.substr(pos + 1, header.value.find('\n') - pos); + kphp::log::info("header.value {}", attributes); + for (auto attribute : parse_attrs(attributes)) { + kphp::log::info("attribute with name {}", attribute.name); + if (attribute.name == "name") { + part.name_attribute = attribute.value; + } else if (attribute.name == "filename") { + part.filename_attribute = attribute.value; + } else { + // ignore unknown attribute + } + } + } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { + part.content_type = header.value; + } else { + // ignore unused header + } + } + part.body = part_body; - if (s[header_end] == '\r') { - header_end--; + return part; } - auto hdr{header::create(s.substr(0, header_end + 1))}; - s = s.substr(lf + 1); - return std::pair, std::string_view>(hdr, s); -} +private: -auto parse_attrs(std::string_view header_value) { - return std::views::split(header_value, ";") | std::views::transform([](auto part) { - std::string_view part_view{vk::trim(std::string_view(part))}; - auto [name_view, value_view]{vk::split_string_view(part_view, '=')}; - if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { - value_view = value_view.substr(1, value_view.size() - 2); - } - return part_attribute{name_view, value_view}; - }); -} +}; -void MultipartBody::parse_into(mixed& v$_POST, mixed& v$_FILES) { - kphp::log::info("body {}", body); - kphp::log::info("boundary {}", boundary); - auto parts = std::views::split(body, boundary) | std::views::transform([](auto raw_part) noexcept -> std::optional { - kphp::log::info("part of body {}", raw_part); - std::string_view str{raw_part}; - if (!str.ends_with("--")) { - return std::nullopt; - } - // cut "--" before next boundary - str = str.substr(0, str.size() - 2); - // trim \r\n - str = trim_crlf(str); - - part part; - while (true) { - auto [maybe_header, new_str] = parse_next_header(str); - if (!maybe_header) { - break; - } - str = new_str; - header header{maybe_header.value()}; - kphp::log::info("header name {}", header.name); - if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { - if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { - return std::nullopt; - } - auto attrs = parse_attrs(header.value); - for (part_attribute a : attrs) { - if (a.attr.empty()) { - return std::nullopt; - } - if (a.attr == "name") { - part.name = a.value; - } else if (a.attr == "filename") { - part.filename = a.value; - } - } - } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - part.content_type = header.value; - } - } - part.data = str; - return part; - }); - - for (std::optional maybe_part : parts) { - kphp::log::info("process multipart part"); - if (!maybe_part) { - continue; - } - kphp::log::info("process multipart non-null part"); - part p{maybe_part.value()}; - if (p.name.empty()) { - continue; - } - kphp::log::info("process multipart non-empty part"); - if (!p.filename.empty()) { - addFile(p, v$_FILES); - } else { - addPost(p, v$_POST); - } - } +auto parse_parts(std::string_view body, std::string_view boundary) noexcept { + return std::views::split(body, std::views::join(std::array{std::string_view{"--"}, boundary})) | + std::views::filter([](auto raw_part) { return !std::string_view(raw_part).empty(); }) | + std::views::transform([](auto raw_part) noexcept -> std::optional { return part::parse(trim_crlf(std::string_view(raw_part))); }) | + std::views::take_while([](auto part_opt) { return part_opt.has_value(); }) | std::views::transform([](auto part_opt) { return *part_opt; }); } -void MultipartBody::addPost(const part& part, mixed& v$_POST) { - string name{part.name.data(), static_cast(part.name.size())}; - v$_POST.set_value(name, string(part.data.data(), part.data.size())); +void addPost(const part& part, mixed& v$_POST) { + kphp::log::info("addPost"); + string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + v$_POST.set_value(name, string(part.body.data(), part.body.size())); } -void MultipartBody::addFile(const part& part, mixed& v$_FILES) { - // TODO: replace f$random_bytes to avoid string allocation +void addFile(const part& part, mixed& v$_FILES) { + // TODO: replace f$random_bytes to avoid string allocation Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; if (!rand_str.has_value()) { @@ -214,24 +186,24 @@ void MultipartBody::addFile(const part& part, mixed& v$_FILES) { } int file_size{0}; - auto file_size_result = (*file).write({reinterpret_cast(part.data.data()), part.data.size()}); + auto file_size_result = (*file).write({reinterpret_cast(part.body.data()), part.body.size()}); if (file_size_result.has_value()) { file_size = file_size_result.value(); - if (file_size < part.data.size()) { - kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.data.size()); + if (file_size < part.body.size()) { + kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.body.size()); return; } } else { kphp::log::warning("error write to tmp file: errcode {}", file_size_result.error()); } - string name{part.name.data(), static_cast(part.name.size())}; + string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; - if (part.name.ends_with("[]")) { + if (part.name_attribute.ends_with("[]")) { mixed& file = v$_FILES[name.substr(0, name.size() - 2)]; - if (file_size == part.data.size()) { - file[string("name")].push_back(string(part.filename.data(), part.filename.size())); - file[string("type")].push_back(string(part.content_type.data(), part.content_type.size())); + if (file_size == part.body.size()) { + file[string("name")].push_back(string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); + file[string("type")].push_back(string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); file[string("size")].push_back(file_size); file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); file[string("error")].push_back(0); @@ -244,9 +216,9 @@ void MultipartBody::addFile(const part& part, mixed& v$_FILES) { } } else { mixed& file = v$_FILES[name]; - if (file_size == part.data.size()) { - file.set_value(string("name"), string(part.filename.data(), part.filename.size())); - file.set_value(string("type"), string(part.content_type.data(), part.content_type.size())); + if (file_size == part.body.size()) { + file.set_value(string("name"), string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); + file.set_value(string("type"), string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); file.set_value(string("size"), file_size); file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); file.set_value(string("error"), 0); @@ -260,11 +232,23 @@ void MultipartBody::addFile(const part& part, mixed& v$_FILES) { } // namespace -namespace kphp::http::multipart { +namespace kphp::http { -void parse_multipart(std::string_view body, std::string_view boundary, mixed& v$_POST, mixed& v$_FILES) { - MultipartBody mb{body, boundary}; - mb.parse_into(v$_POST, v$_FILES); +void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { + kphp::log::info("body {}", body); + kphp::log::info("boundary {}", boundary); + for (auto part : parse_parts(body, boundary)) { + kphp::log::info("process multipart name_attribute {}", part.name_attribute); + if (part.name_attribute.empty()) { + continue; + } + + if (part.filename_attribute.has_value()) { + addFile(part, superglobals.v$_FILES); + } else { + addPost(part, superglobals.v$_POST); + } + } } std::optional extract_boundary(std::string_view content_type) noexcept { @@ -280,4 +264,4 @@ std::optional extract_boundary(std::string_view content_type) return res; } -} // namespace kphp::http::multipart +} // namespace kphp::http diff --git a/runtime-light/server/http/multipart.h b/runtime-light/server/http/multipart.h index 88291febde..a6995dee8c 100644 --- a/runtime-light/server/http/multipart.h +++ b/runtime-light/server/http/multipart.h @@ -5,11 +5,11 @@ #include #include -#include "runtime-common/core/runtime-core.h" +#include "runtime-light/core/globals/php-script-globals.h" -namespace kphp::http::multipart { +namespace kphp::http { -void parse_multipart(std::string_view body, std::string_view boundary, mixed& v$_POST, mixed& v$_FILES); +void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept; std::optional extract_boundary(std::string_view content_type) noexcept; From 84d650584887e2c51644a31f702f40df980d7251 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Thu, 5 Mar 2026 15:25:20 +0300 Subject: [PATCH 05/26] wip --- runtime-light/stdlib/rpc/rpc-api.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime-light/stdlib/rpc/rpc-api.cpp b/runtime-light/stdlib/rpc/rpc-api.cpp index 00a207ed7a..4b26403d9d 100644 --- a/runtime-light/stdlib/rpc/rpc-api.cpp +++ b/runtime-light/stdlib/rpc/rpc-api.cpp @@ -344,7 +344,7 @@ kphp::coro::task send_request(std::string_view actor, std co_return std::move(opt_response); }}; - static constexpr auto ignore_answer_awaiter_coroutine{[](kphp::component::stream stream, std::chrono::milliseconds timeout) -> kphp::coro::shared_task { + static constexpr auto ignore_answer_awaiter_coroutine{[](kphp::component::stream stream, std::chrono::milliseconds timeout) noexcept -> kphp::coro::shared_task { auto fetch_task{kphp::component::fetch_response(stream, [](std::span) noexcept {})}; std::ignore = co_await kphp::coro::io_scheduler::get().schedule(std::move(fetch_task), timeout); }}; From 0fba8b8e7c9baeccd9497dba1a9d3d6f93036e99 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 10:24:29 +0300 Subject: [PATCH 06/26] wip --- runtime-light/server/http/multipart.cpp | 56 +++++++++++++++---------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/runtime-light/server/http/multipart.cpp b/runtime-light/server/http/multipart.cpp index d744e8207b..2aa080ffa4 100644 --- a/runtime-light/server/http/multipart.cpp +++ b/runtime-light/server/http/multipart.cpp @@ -2,15 +2,21 @@ // Copyright (c) 2026 LLC «V Kontakte» // Distributed under the GPL v3 License, see LICENSE.notice.txt +#include +#include +#include #include #include +#include #include #include #include #include #include "common/algorithms/string-algorithms.h" +#include "runtime-common/core/core-types/decl/optional.h" #include "runtime-common/core/runtime-core.h" +#include "runtime-light/core/globals/php-script-globals.h" #include "runtime-light/server/http/http-server-state.h" #include "runtime-light/server/http/multipart.h" #include "runtime-light/stdlib/diagnostics/logs.h" @@ -43,8 +49,8 @@ std::string_view trim_crlf(std::string_view sv) { } struct part_header { - const std::string_view name; - const std::string_view value; + std::string_view name; + std::string_view value; static std::optional parse(std::string_view header) noexcept { auto [name_view, value_view]{vk::split_string_view(header, ':')}; @@ -73,8 +79,8 @@ auto parse_headers(std::string_view sv) noexcept { } struct part_attribute { - const std::string_view name; - const std::string_view value; + std::string_view name; + std::string_view value; static std::optional parse(std::string_view attribute) noexcept { auto [name_view, value_view]{vk::split_string_view(vk::trim(attribute), '=')}; @@ -118,7 +124,10 @@ struct part { const std::string_view part_headers{part_view.substr(0, part_body_start)}; const std::string_view part_body{part_view.substr(part_body_start + PART_BODY_DELIM.size())}; - part part; + std::optional content_type{std::nullopt}; + std::optional filename_attribute{std::nullopt}; + std::optional name_attribute{std::nullopt}; + for (const auto& header : parse_headers(part_headers)) { if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { @@ -131,42 +140,47 @@ struct part { for (auto attribute : parse_attrs(attributes)) { kphp::log::info("attribute with name {}", attribute.name); if (attribute.name == "name") { - part.name_attribute = attribute.value; + name_attribute = attribute.value; } else if (attribute.name == "filename") { - part.filename_attribute = attribute.value; + filename_attribute = attribute.value; } else { // ignore unknown attribute } } } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - part.content_type = header.value; + content_type = header.value; } else { // ignore unused header } } - part.body = part_body; - - return part; + if (!name_attribute.has_value()) { + return std::nullopt; + } + return part(*name_attribute, filename_attribute, content_type, part_body); } private: - + part(std::string_view name_attribute, std::optional filename_attribute, std::optional content_type, + std::string_view body) noexcept + : name_attribute(name_attribute), + filename_attribute(filename_attribute), + content_type(content_type), + body(body) {} }; auto parse_parts(std::string_view body, std::string_view boundary) noexcept { return std::views::split(body, std::views::join(std::array{std::string_view{"--"}, boundary})) | std::views::filter([](auto raw_part) { return !std::string_view(raw_part).empty(); }) | std::views::transform([](auto raw_part) noexcept -> std::optional { return part::parse(trim_crlf(std::string_view(raw_part))); }) | - std::views::take_while([](auto part_opt) { return part_opt.has_value(); }) | std::views::transform([](auto part_opt) { return *part_opt; }); + std::views::take_while([](auto part_opt) noexcept { return part_opt.has_value(); }) | std::views::transform([](auto part_opt) { return *part_opt; }); } -void addPost(const part& part, mixed& v$_POST) { - kphp::log::info("addPost"); +void add_post_part(const part& part, mixed& post) { string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; - v$_POST.set_value(name, string(part.body.data(), part.body.size())); + post.set_value(name, string(part.body.data(), part.body.size())); } -void addFile(const part& part, mixed& v$_FILES) { +void add_file_part(const part& part, mixed& files) { // TODO: replace f$random_bytes to avoid string allocation Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; @@ -200,7 +214,7 @@ void addFile(const part& part, mixed& v$_FILES) { string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; if (part.name_attribute.ends_with("[]")) { - mixed& file = v$_FILES[name.substr(0, name.size() - 2)]; + mixed& file = files[name.substr(0, name.size() - 2)]; if (file_size == part.body.size()) { file[string("name")].push_back(string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); file[string("type")].push_back(string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); @@ -215,7 +229,7 @@ void addFile(const part& part, mixed& v$_FILES) { file[string("error")].push_back(-file_size); } } else { - mixed& file = v$_FILES[name]; + mixed& file = files[name]; if (file_size == part.body.size()) { file.set_value(string("name"), string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); file.set_value(string("type"), string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); @@ -244,9 +258,9 @@ void process_multipart_content_type(std::string_view body, std::string_view boun } if (part.filename_attribute.has_value()) { - addFile(part, superglobals.v$_FILES); + add_file_part(part, superglobals.v$_FILES); } else { - addPost(part, superglobals.v$_POST); + add_post_part(part, superglobals.v$_POST); } } } From 40923299e3ca0b63b49849a853ba50afa862391a Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 12:57:28 +0300 Subject: [PATCH 07/26] wip --- runtime-light/server/http/init-functions.cpp | 6 +- runtime-light/server/http/multipart.cpp | 281 ------------------ runtime-light/server/http/multipart.h | 16 - .../http/multipart/details/parts-parsing.h | 169 +++++++++++ .../multipart/details/parts-processing.cpp | 114 +++++++ .../http/multipart/details/parts-processing.h | 16 + .../server/http/multipart/multipart.h | 41 +++ runtime-light/server/server.cmake | 2 +- 8 files changed, 344 insertions(+), 301 deletions(-) delete mode 100644 runtime-light/server/http/multipart.cpp delete mode 100644 runtime-light/server/http/multipart.h create mode 100644 runtime-light/server/http/multipart/details/parts-parsing.h create mode 100644 runtime-light/server/http/multipart/details/parts-processing.cpp create mode 100644 runtime-light/server/http/multipart/details/parts-processing.h create mode 100644 runtime-light/server/http/multipart/multipart.h diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 477aa02006..0c8b971a15 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -26,7 +26,7 @@ #include "runtime-light/core/globals/php-script-globals.h" #include "runtime-light/k2-platform/k2-api.h" #include "runtime-light/server/http/http-server-state.h" -#include "runtime-light/server/http/multipart.h" +#include "runtime-light/server/http/multipart/multipart.h" #include "runtime-light/state/instance-state.h" #include "runtime-light/stdlib/component/component-api.h" #include "runtime-light/stdlib/diagnostics/logs.h" @@ -327,9 +327,9 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/algorithms/string-algorithms.h" -#include "runtime-common/core/core-types/decl/optional.h" -#include "runtime-common/core/runtime-core.h" -#include "runtime-light/core/globals/php-script-globals.h" -#include "runtime-light/server/http/http-server-state.h" -#include "runtime-light/server/http/multipart.h" -#include "runtime-light/stdlib/diagnostics/logs.h" -#include "runtime-light/stdlib/file/resource.h" -#include "runtime-light/stdlib/math/random-functions.h" - -namespace { - -constexpr int8_t TMP_FILENAME_LENGTH = 10; -constexpr std::string_view TMP_DIR = "/tmp/"; - -constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;"; -constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; - -std::string_view trim_crlf(std::string_view sv) { - if (sv.starts_with('\r')) { - sv.remove_prefix(1); - } - if (sv.starts_with('\n')) { - sv.remove_prefix(1); - } - - if (sv.ends_with('\n')) { - sv.remove_suffix(1); - } - if (sv.ends_with('\r')) { - sv.remove_suffix(1); - } - return sv; -} - -struct part_header { - std::string_view name; - std::string_view value; - - static std::optional parse(std::string_view header) noexcept { - auto [name_view, value_view]{vk::split_string_view(header, ':')}; - if (name_view.empty() || value_view.empty()) { - return std::nullopt; - } - return part_header{name_view, value_view.substr(1)}; - } - - bool name_is(std::string_view header_name) const noexcept { - const auto lower_name{name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; - const auto lower_header_name{header_name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; - return std::ranges::equal(lower_name, lower_header_name); - } - -private: - part_header(std::string_view name, std::string_view value) noexcept - : name(name), - value(value) {} -}; - -auto parse_headers(std::string_view sv) noexcept { - static constexpr std::string_view DELIM = "\r\n"; - return std::views::split(sv, DELIM) | std::views::transform([](auto raw_header) { return part_header::parse(std::string_view(raw_header)); }) | - std::views::take_while([](auto header_opt) { return header_opt.has_value(); }) | std::views::transform([](auto header_opt) { return *header_opt; }); -} - -struct part_attribute { - std::string_view name; - std::string_view value; - - static std::optional parse(std::string_view attribute) noexcept { - auto [name_view, value_view]{vk::split_string_view(vk::trim(attribute), '=')}; - if (name_view.empty() || value_view.empty()) { - return std::nullopt; - } - // todo assert "value" - if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { - value_view = value_view.substr(1, value_view.size() - 2); - } - return part_attribute{name_view, value_view}; - } - -private: - part_attribute(std::string_view name, std::string_view value) noexcept - : name(name), - value(value) {} -}; - -auto parse_attrs(std::string_view header_value) noexcept { - static constexpr std::string_view DELIM = ";"; - return std::views::split(header_value, DELIM) | std::views::transform([](auto part) { return part_attribute::parse(std::string_view(part)); }) | - std::views::take_while([](auto attribute_opt) { return attribute_opt.has_value(); }) | - std::views::transform([](auto attribute_opt) { return *attribute_opt; }); -} - -struct part { - std::string_view name_attribute; - std::optional filename_attribute; - std::optional content_type; - std::string_view body; - - static std::optional parse(std::string_view part_view) { - static constexpr std::string_view PART_BODY_DELIM = "\r\n\r\n"; - - const size_t part_body_start{part_view.find(PART_BODY_DELIM)}; - if (part_body_start == std::string_view::npos) { - return std::nullopt; - } - - const std::string_view part_headers{part_view.substr(0, part_body_start)}; - const std::string_view part_body{part_view.substr(part_body_start + PART_BODY_DELIM.size())}; - - std::optional content_type{std::nullopt}; - std::optional filename_attribute{std::nullopt}; - std::optional name_attribute{std::nullopt}; - - for (const auto& header : parse_headers(part_headers)) { - if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { - if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { - return std::nullopt; - } - - size_t pos = header.value.find(';'); - std::string_view attributes = header.value.substr(pos + 1, header.value.find('\n') - pos); - kphp::log::info("header.value {}", attributes); - for (auto attribute : parse_attrs(attributes)) { - kphp::log::info("attribute with name {}", attribute.name); - if (attribute.name == "name") { - name_attribute = attribute.value; - } else if (attribute.name == "filename") { - filename_attribute = attribute.value; - } else { - // ignore unknown attribute - } - } - } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - content_type = header.value; - } else { - // ignore unused header - } - } - if (!name_attribute.has_value()) { - return std::nullopt; - } - return part(*name_attribute, filename_attribute, content_type, part_body); - } - -private: - part(std::string_view name_attribute, std::optional filename_attribute, std::optional content_type, - std::string_view body) noexcept - : name_attribute(name_attribute), - filename_attribute(filename_attribute), - content_type(content_type), - body(body) {} -}; - -auto parse_parts(std::string_view body, std::string_view boundary) noexcept { - return std::views::split(body, std::views::join(std::array{std::string_view{"--"}, boundary})) | - std::views::filter([](auto raw_part) { return !std::string_view(raw_part).empty(); }) | - std::views::transform([](auto raw_part) noexcept -> std::optional { return part::parse(trim_crlf(std::string_view(raw_part))); }) | - std::views::take_while([](auto part_opt) noexcept { return part_opt.has_value(); }) | std::views::transform([](auto part_opt) { return *part_opt; }); -} - -void add_post_part(const part& part, mixed& post) { - string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; - post.set_value(name, string(part.body.data(), part.body.size())); -} - -void add_file_part(const part& part, mixed& files) { - // TODO: replace f$random_bytes to avoid string allocation - Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; - - if (!rand_str.has_value()) { - kphp::log::warning("error generating random_bytes for tmp file"); - return; - } - - string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()}; - tmp_name_str.append(rand_str.val()); - std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()}; - - auto file{kphp::fs::file::open(tmp_name, "w")}; - if (!file) [[unlikely]] { - kphp::log::warning("error opening tmp file {}: error code -> {}", tmp_name, file.error()); - return; - } - - int file_size{0}; - auto file_size_result = (*file).write({reinterpret_cast(part.body.data()), part.body.size()}); - if (file_size_result.has_value()) { - file_size = file_size_result.value(); - if (file_size < part.body.size()) { - kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.body.size()); - return; - } - } else { - kphp::log::warning("error write to tmp file: errcode {}", file_size_result.error()); - } - - string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; - - if (part.name_attribute.ends_with("[]")) { - mixed& file = files[name.substr(0, name.size() - 2)]; - if (file_size == part.body.size()) { - file[string("name")].push_back(string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); - file[string("type")].push_back(string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); - file[string("size")].push_back(file_size); - file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); - file[string("error")].push_back(0); - } else { - file[string("name")].push_back(string()); - file[string("type")].push_back(string()); - file[string("size")].push_back(0); - file[string("tmp_name")].push_back(string()); - file[string("error")].push_back(-file_size); - } - } else { - mixed& file = files[name]; - if (file_size == part.body.size()) { - file.set_value(string("name"), string(part.filename_attribute.value().data(), part.filename_attribute.value().size())); - file.set_value(string("type"), string(part.content_type.value_or("").data(), part.content_type.value_or("").size())); - file.set_value(string("size"), file_size); - file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); - file.set_value(string("error"), 0); - } else { - file.set_value(string("size"), 0); - file.set_value(string("tmp_name"), string()); - file.set_value(string("error"), -file_size); - } - } -} - -} // namespace - -namespace kphp::http { - -void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { - kphp::log::info("body {}", body); - kphp::log::info("boundary {}", boundary); - for (auto part : parse_parts(body, boundary)) { - kphp::log::info("process multipart name_attribute {}", part.name_attribute); - if (part.name_attribute.empty()) { - continue; - } - - if (part.filename_attribute.has_value()) { - add_file_part(part, superglobals.v$_FILES); - } else { - add_post_part(part, superglobals.v$_POST); - } - } -} - -std::optional extract_boundary(std::string_view content_type) noexcept { - size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; - if (pos == std::string_view::npos) { - return std::nullopt; - } - // todo assert "body" - std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; - if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) { - res = res.substr(1, res.size() - 2); - } - return res; -} - -} // namespace kphp::http diff --git a/runtime-light/server/http/multipart.h b/runtime-light/server/http/multipart.h deleted file mode 100644 index a6995dee8c..0000000000 --- a/runtime-light/server/http/multipart.h +++ /dev/null @@ -1,16 +0,0 @@ -// Compiler for PHP (aka KPHP) -// Copyright (c) 2026 LLC «V Kontakte» -// Distributed under the GPL v3 License, see LICENSE.notice.txt - -#include -#include - -#include "runtime-light/core/globals/php-script-globals.h" - -namespace kphp::http { - -void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept; - -std::optional extract_boundary(std::string_view content_type) noexcept; - -} // namespace kphp::http::multipart diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h new file mode 100644 index 0000000000..5190c72f71 --- /dev/null +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -0,0 +1,169 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/algorithms/string-algorithms.h" +#include "runtime-light/server/http/http-server-state.h" + +namespace kphp::http::multipart::details { + +constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;"; + +inline std::string_view trim_crlf(std::string_view sv) noexcept { + if (sv.starts_with('\r')) { + sv.remove_prefix(1); + } + if (sv.starts_with('\n')) { + sv.remove_prefix(1); + } + + if (sv.ends_with('\n')) { + sv.remove_suffix(1); + } + if (sv.ends_with('\r')) { + sv.remove_suffix(1); + } + return sv; +} + +struct part_header { + std::string_view name; + std::string_view value; + + static std::optional parse(std::string_view header) noexcept { + auto [name_view, value_view]{vk::split_string_view(header, ':')}; + if (name_view.empty() || value_view.empty()) { + return std::nullopt; + } + return part_header{name_view, value_view.substr(1)}; + } + + bool name_is(std::string_view header_name) const noexcept { + const auto lower_name{name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; + const auto lower_header_name{header_name | std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })}; + return std::ranges::equal(lower_name, lower_header_name); + } + +private: + part_header(std::string_view name, std::string_view value) noexcept + : name(name), + value(value) {} +}; + +inline auto parse_headers(std::string_view sv) noexcept { + static constexpr std::string_view DELIM = "\r\n"; + return std::views::split(sv, DELIM) | std::views::transform([](auto raw_header) noexcept { return part_header::parse(std::string_view(raw_header)); }) | + std::views::take_while([](auto header_opt) noexcept { return header_opt.has_value(); }) | + std::views::transform([](auto header_opt) noexcept { return *header_opt; }); +} + +struct part_attribute { + std::string_view name; + std::string_view value; + + static std::optional parse(std::string_view attribute) noexcept { + auto [name_view, value_view]{vk::split_string_view(vk::trim(attribute), '=')}; + if (name_view.empty() || value_view.empty()) { + return std::nullopt; + } + + if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { + value_view = value_view.substr(1, value_view.size() - 2); + } + return part_attribute{name_view, value_view}; + } + +private: + part_attribute(std::string_view name, std::string_view value) noexcept + : name(name), + value(value) {} +}; + +inline auto parse_attrs(std::string_view header_value) noexcept { + static constexpr std::string_view DELIM = ";"; + return std::views::split(header_value, DELIM) | std::views::transform([](auto part) noexcept { return part_attribute::parse(std::string_view(part)); }) | + std::views::take_while([](auto attribute_opt) noexcept { return attribute_opt.has_value(); }) | + std::views::transform([](auto attribute_opt) noexcept { return *attribute_opt; }); +} + +struct part { + std::string_view name_attribute; + std::optional filename_attribute; + std::optional content_type; + std::string_view body; + + static std::optional parse(std::string_view part_view) noexcept { + static constexpr std::string_view PART_BODY_DELIM = "\r\n\r\n"; + + const size_t part_body_start{part_view.find(PART_BODY_DELIM)}; + if (part_body_start == std::string_view::npos) { + return std::nullopt; + } + + const std::string_view part_headers{part_view.substr(0, part_body_start)}; + const std::string_view part_body{part_view.substr(part_body_start + PART_BODY_DELIM.size())}; + + std::optional content_type{std::nullopt}; + std::optional filename_attribute{std::nullopt}; + std::optional name_attribute{std::nullopt}; + + for (const auto& header : parse_headers(part_headers)) { + if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) { + if (!header.value.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) { + return std::nullopt; + } + + size_t pos = header.value.find(';'); + std::string_view attributes = header.value.substr(pos + 1, header.value.find('\n') - pos); + kphp::log::info("header.value {}", attributes); + for (auto attribute : parse_attrs(attributes)) { + kphp::log::info("attribute with name {}", attribute.name); + if (attribute.name == "name") { + name_attribute = attribute.value; + } else if (attribute.name == "filename") { + filename_attribute = attribute.value; + } else { + // ignore unknown attribute + } + } + } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { + content_type = header.value; + } else { + // ignore unused header + } + } + if (!name_attribute.has_value() || name_attribute->empty()) { + return std::nullopt; + } + return part(*name_attribute, filename_attribute, content_type, part_body); + } + +private: + part(std::string_view name_attribute, std::optional filename_attribute, std::optional content_type, + std::string_view body) noexcept + : name_attribute(name_attribute), + filename_attribute(filename_attribute), + content_type(content_type), + body(body) {} +}; + +inline auto parse_multipart_parts(std::string_view body, std::string_view boundary) noexcept { + return std::views::split(body, std::views::join(std::array{std::string_view{"--"}, boundary})) | + std::views::filter([](auto raw_part) noexcept { return !std::string_view(raw_part).empty(); }) | + std::views::transform([](auto raw_part) noexcept -> std::optional { return part::parse(trim_crlf(std::string_view(raw_part))); }) | + std::views::take_while([](auto part_opt) noexcept { return part_opt.has_value(); }) | std::views::transform([](auto part_opt) { return *part_opt; }); +} + +} // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp new file mode 100644 index 0000000000..39d037b4cb --- /dev/null +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -0,0 +1,114 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#include "runtime-light/server/http/multipart/details/parts-processing.h" + +#include +#include + +#include "runtime-common/core/core-types/decl/optional.h" +#include "runtime-common/core/runtime-core.h" +#include "runtime-common/stdlib/server/url-functions.h" +#include "runtime-light/server/http/multipart/details/parts-parsing.h" +#include "runtime-light/stdlib/file/resource.h" +#include "runtime-light/stdlib/math/random-functions.h" + +namespace { + +constexpr std::string_view CONTENT_TYPE_APP_FORM_URLENCODED = "application/x-www-form-urlencoded"; + +constexpr int8_t TMP_FILENAME_LENGTH = 10; +constexpr std::string_view TMP_DIR = "/tmp/"; + +constexpr std::string_view DEFAULT_CONTENT_TYPE = "text/plain"; + +constexpr int32_t UPLOAD_ERR_OK = 0; +constexpr int32_t UPLOAD_ERR_INI_SIZE = 1; // unused in kphp +constexpr int32_t UPLOAD_ERR_FORM_SIZE = 2; // todo support header max-file-size +constexpr int32_t UPLOAD_ERR_PARTIAL = 3; +constexpr int32_t UPLOAD_ERR_NO_FILE = 4; +constexpr int32_t UPLOAD_ERR_NO_TMP_DIR = 6; // todo support check tmp dir +constexpr int32_t UPLOAD_ERR_CANT_WRITE = 7; +constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp + +} // namespace + +namespace kphp::http::multipart::details { + +void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept { + const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + const string body{part.body.data(), static_cast(part.body.size())}; + if (part.content_type.has_value() && (*part.content_type) == CONTENT_TYPE_APP_FORM_URLENCODED) { + f$parse_str(body, post[name]); + } else { + post.set_value(name, string(part.body.data(), part.body.size())); + } +} + +void process_upload_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept { + // TODO: replace f$random_bytes to avoid string allocation + Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; + + if (!rand_str.has_value()) [[unlikely]] { + // kphp::log::warning("error generating random_bytes for tmp file"); + return; + } + + string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()}; + tmp_name_str.append(rand_str.val()); + std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()}; + + auto file_res{kphp::fs::file::open(tmp_name, "w")}; + int32_t error_code{UPLOAD_ERR_OK}; + size_t file_size{}; + if (file_res.has_value()) { + const auto written_res{(*file_res).write({reinterpret_cast(part.body.data()), part.body.size()})}; + if (written_res.has_value()) { + file_size = *written_res; + if (file_size < part.body.size()) { + error_code = UPLOAD_ERR_PARTIAL; + } + } else { + *file_res->close(); + error_code = UPLOAD_ERR_CANT_WRITE; + } + + } else { + error_code = UPLOAD_ERR_NO_FILE; + } + + kphp::log::assertion(part.filename_attribute.has_value()); + + const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + if (part.name_attribute.ends_with("[]")) { + mixed& file = files[name.substr(0, name.size() - 2)]; + if (error_code != UPLOAD_ERR_OK) { + file[string("name")].push_back(string()); + file[string("type")].push_back(string()); + file[string("size")].push_back(0); + file[string("tmp_name")].push_back(string()); + file[string("error")].push_back(error_code); + } else { + file[string("name")].push_back(string((*part.filename_attribute).data(), (*part.filename_attribute).size())); + file[string("type")].push_back(string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); + file[string("size")].push_back(static_cast(file_size)); + file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); + file[string("error")].push_back(0); + } + } else { + mixed& file = files[name]; + if (error_code != UPLOAD_ERR_OK) { + file.set_value(string("size"), 0); + file.set_value(string("tmp_name"), string()); + file.set_value(string("error"), error_code); + } else { + file.set_value(string("name"), string((*part.filename_attribute).data(), (*part.filename_attribute).size())); + file.set_value(string("type"), string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); + file.set_value(string("size"), static_cast(file_size)); + file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); + file.set_value(string("error"), 0); + } + } +} +} // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/details/parts-processing.h b/runtime-light/server/http/multipart/details/parts-processing.h new file mode 100644 index 0000000000..f7f8768e60 --- /dev/null +++ b/runtime-light/server/http/multipart/details/parts-processing.h @@ -0,0 +1,16 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include "runtime-common/core/runtime-core.h" +#include "runtime-light/server/http/multipart/details/parts-parsing.h" + +namespace kphp::http::multipart::details { + +void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept; + +void process_upload_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept; + +} // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h new file mode 100644 index 0000000000..c80899c0ef --- /dev/null +++ b/runtime-light/server/http/multipart/multipart.h @@ -0,0 +1,41 @@ +// Compiler for PHP (aka KPHP) +// Copyright (c) 2026 LLC «V Kontakte» +// Distributed under the GPL v3 License, see LICENSE.notice.txt + +#pragma once + +#include +#include + +#include "runtime-light/server/http/multipart/details/parts-parsing.h" +#include "runtime-light/server/http/multipart/details/parts-processing.h" + +namespace kphp::http::multipart { + +constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; + +inline void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { + for (auto part : details::parse_multipart_parts(body, boundary)) { + kphp::log::info("process multipart name_attribute {}", part.name_attribute); + if (part.filename_attribute.has_value()) { + details::process_upload_multipart(part, superglobals.v$_FILES); + } else { + details::process_post_multipart(part, superglobals.v$_POST); + } + } +} + +inline std::optional extract_boundary(std::string_view content_type) noexcept { + size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; + if (pos == std::string_view::npos) { + return std::nullopt; + } + // todo assert "body" + std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; + if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) { + res = res.substr(1, res.size() - 2); + } + return res; +} + +} // namespace kphp::http::multipart diff --git a/runtime-light/server/server.cmake b/runtime-light/server/server.cmake index 5b37757b34..ff4ef042e7 100644 --- a/runtime-light/server/server.cmake +++ b/runtime-light/server/server.cmake @@ -3,7 +3,7 @@ prepend( server/ cli/cli-instance-state.cpp http/init-functions.cpp - http/multipart.cpp + http/multipart/details/parts-processing.cpp http/http-server-state.cpp job-worker/job-worker-server-state.cpp rpc/init-functions.cpp From 2c4f0210618f6d5194868c60dbee9f8a0fbfcf44 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 13:10:22 +0300 Subject: [PATCH 08/26] wip --- .../server/http/multipart/details/parts-parsing.h | 15 +++++++++------ .../http/multipart/details/parts-processing.cpp | 9 +++++---- runtime-light/server/http/multipart/multipart.h | 5 ++--- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h index 5190c72f71..76927f343f 100644 --- a/runtime-light/server/http/multipart/details/parts-parsing.h +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -125,11 +125,14 @@ struct part { return std::nullopt; } - size_t pos = header.value.find(';'); - std::string_view attributes = header.value.substr(pos + 1, header.value.find('\n') - pos); - kphp::log::info("header.value {}", attributes); - for (auto attribute : parse_attrs(attributes)) { - kphp::log::info("attribute with name {}", attribute.name); + // skip first Content-Disposition: form-data; + const size_t pos{header.value.find(';')}; + if (pos == std::string::npos) { + return std::nullopt; + } + + const std::string_view attributes{trim_crlf(header.value).substr(pos + 1)}; + for (const auto& attribute : parse_attrs(attributes)) { if (attribute.name == "name") { name_attribute = attribute.value; } else if (attribute.name == "filename") { @@ -139,7 +142,7 @@ struct part { } } } else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) { - content_type = header.value; + content_type = trim_crlf(header.value); } else { // ignore unused header } diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 39d037b4cb..de97a166f1 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -11,6 +11,7 @@ #include "runtime-common/core/runtime-core.h" #include "runtime-common/stdlib/server/url-functions.h" #include "runtime-light/server/http/multipart/details/parts-parsing.h" +#include "runtime-light/stdlib/diagnostics/logs.h" #include "runtime-light/stdlib/file/resource.h" #include "runtime-light/stdlib/math/random-functions.h" @@ -24,13 +25,13 @@ constexpr std::string_view TMP_DIR = "/tmp/"; constexpr std::string_view DEFAULT_CONTENT_TYPE = "text/plain"; constexpr int32_t UPLOAD_ERR_OK = 0; -constexpr int32_t UPLOAD_ERR_INI_SIZE = 1; // unused in kphp -constexpr int32_t UPLOAD_ERR_FORM_SIZE = 2; // todo support header max-file-size +// constexpr int32_t UPLOAD_ERR_INI_SIZE = 1; // unused in kphp +// constexpr int32_t UPLOAD_ERR_FORM_SIZE = 2; // todo support header max-file-size constexpr int32_t UPLOAD_ERR_PARTIAL = 3; constexpr int32_t UPLOAD_ERR_NO_FILE = 4; -constexpr int32_t UPLOAD_ERR_NO_TMP_DIR = 6; // todo support check tmp dir +// constexpr int32_t UPLOAD_ERR_NO_TMP_DIR = 6; // todo support check tmp dir constexpr int32_t UPLOAD_ERR_CANT_WRITE = 7; -constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp +// constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp } // namespace diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h index c80899c0ef..9f18cb6879 100644 --- a/runtime-light/server/http/multipart/multipart.h +++ b/runtime-light/server/http/multipart/multipart.h @@ -15,8 +15,7 @@ namespace kphp::http::multipart { constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; inline void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { - for (auto part : details::parse_multipart_parts(body, boundary)) { - kphp::log::info("process multipart name_attribute {}", part.name_attribute); + for (const auto& part : details::parse_multipart_parts(body, boundary)) { if (part.filename_attribute.has_value()) { details::process_upload_multipart(part, superglobals.v$_FILES); } else { @@ -26,7 +25,7 @@ inline void process_multipart_content_type(std::string_view body, std::string_vi } inline std::optional extract_boundary(std::string_view content_type) noexcept { - size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; + const size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; if (pos == std::string_view::npos) { return std::nullopt; } From 0f5f1b88b76fb3bec4f7b313f87bdd575caed429 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:23:01 +0300 Subject: [PATCH 09/26] add tests --- tests/python/tests/http_server/php/index.php | 31 ++++ .../tests/http_server/test_multipart.py | 144 ++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 tests/python/tests/http_server/test_multipart.py diff --git a/tests/python/tests/http_server/php/index.php b/tests/python/tests/http_server/php/index.php index d6ad319bfc..67ec91d289 100644 --- a/tests/python/tests/http_server/php/index.php +++ b/tests/python/tests/http_server/php/index.php @@ -237,6 +237,37 @@ public function work(string $output) { }); break; } +} else if ($_SERVER["PHP_SELF"] === "/test_multipart") { + switch($_GET["type"]) { + case "simple_names_attributes": + echo "name : " . $_POST["name"] . "\n"; + echo "role : " . $_POST["role"] . "\n"; + break; + case "simple_file_attribute": + echo "filename : " . $_FILES["file"]['name'] . "\n"; + $tmp_name = $_FILES["file"]['tmp_name']; + $file_first_line = file($tmp_name)[0]; + echo "content : " . $file_first_line; + break; + case "file_array_attribute": + $files = $_FILES["files"]; + $first_file = $files["tmp_name"][0]; + $file_first_line = file($first_file)[0]; + echo "content-1 : " . $file_first_line; + + $second_file = $files["tmp_name"][1]; + $file_first_line = file($second_file)[0]; + echo "content-2 : " . $file_first_line; + break; + case "name_urlencoded_attribute": + echo $_POST["form"]['name'] . "\n"; + echo $_POST["form"]['note'] . "\n"; + break; + default: + echo "ERROR"; + return; + } + echo "OK"; } else if ($_SERVER["PHP_SELF"] === "/test_ignore_user_abort") { register_shutdown_function('shutdown_function'); /** @var I */ diff --git a/tests/python/tests/http_server/test_multipart.py b/tests/python/tests/http_server/test_multipart.py new file mode 100644 index 0000000000..5413618f93 --- /dev/null +++ b/tests/python/tests/http_server/test_multipart.py @@ -0,0 +1,144 @@ +from urllib.parse import urlencode + +from python.lib.testcase import WebServerAutoTestCase + + +class TestMultipartContentType(WebServerAutoTestCase): + + def test_multipart_name_attributes(self): + boundary = "------------------------d74496d66958873e" + + data = (f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="name"\r\n' + "\r\n" + "Ivan\r\n" + f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="role"\r\n' + "\r\n" + "admin\r\n" + f"--{boundary}--\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(data)), # keep if http_request doesn't auto-set it + } + + response = self.web_server.http_request( + uri="/test_multipart?type=simple_names_attributes", + method="POST", + headers=headers, + data=data, # body goes here + ) + + self.assertEqual(200, response.status_code) + self.assertTrue(response.content.find(b"name : Ivan") != -1) + self.assertTrue(response.content.find(b"role : admin") != -1) + + def test_multipart_filename_attribute(self): + boundary = "------------------------d74496d66958873e" + + file_bytes = b"Hello from test.txt\nSecond line\n" + + data = (f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + "Content-Type: text/plain\r\n" + "\r\n" + ).encode("utf-8") + file_bytes + ( + "\r\n" + f"--{boundary}--\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(data)), + } + + response = self.web_server.http_request( + uri="/test_multipart?type=simple_file_attribute", + method="POST", + headers=headers, + data=data, + ) + + self.assertEqual(200, response.status_code) + self.assertTrue(response.content.find(b"filename : test.txt") != -1) + self.assertTrue(response.content.find(b"Hello from test.txt") != -1) + + def test_multipart_filename_array_attribute(self): + boundary = "------------------------d74496d66958873e" + + # Two "files" (their raw bytes) + file1_name = "a.txt" + file1_bytes = b"Hello from a.txt\n" + + file2_name = "b.txt" + file2_bytes = b"Hello from b.txt\n" + + # Array-style field name: files[] + data = (f"--{boundary}\r\n" + f'Content-Disposition: form-data; name="files[]"; filename="{file1_name}"\r\n' + "Content-Type: text/plain\r\n" + "\r\n" + ).encode("utf-8") + file1_bytes + ( + "\r\n" + f"--{boundary}\r\n" + f'Content-Disposition: form-data; name="files[]"; filename="{file2_name}"\r\n' + "Content-Type: text/plain\r\n" + "\r\n" + ).encode("utf-8") + file2_bytes + ( + "\r\n" + f"--{boundary}--\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(data)), + } + + response = self.web_server.http_request( + uri="/test_multipart?type=file_array_attribute", + method="POST", + headers=headers, + data=data, + ) + + self.assertEqual(200, response.status_code) + self.assertTrue(response.content.find(b"Hello from a.txt") != -1) + self.assertTrue(response.content.find(b"Hello from b.txt") != -1) + + def test_multipart_name_urlencoded_attribute(self): + boundary = "------------------------d74496d66958873e" + + # Part with explicit Content-Type: application/x-www-form-urlencoded + # (this is still multipart/form-data overall; only this part is urlencoded) + urlencoded_part = b"name=Ivan+Petrov&role=admin¬e=a%26b%3Dc%25" + + data = (f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="form"\r\n' + "Content-Type: application/x-www-form-urlencoded; charset=UTF-8\r\n" + "\r\n" + ).encode("utf-8") + urlencoded_part + ( + "\r\n" + f"--{boundary}--\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(data)), # omit if your http_request sets it + } + + response = self.web_server.http_request( + uri="/test_multipart?type=name_urlencoded_attribute", + method="POST", + headers=headers, + data=data, # raw body bytes + ) + + self.assertEqual(200, response.status_code) + self.assertTrue(response.content.find(b"Ivan Petrov") != -1) + self.assertTrue(response.content.find(b"a&b=c%") != -1) From 3f5932aaf10b12ad60cf588ec9a4c80d510fcf24 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:23:05 +0300 Subject: [PATCH 10/26] small fix --- .../server/http/multipart/details/parts-processing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index de97a166f1..573657ee5f 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -40,7 +40,7 @@ namespace kphp::http::multipart::details { void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept { const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; const string body{part.body.data(), static_cast(part.body.size())}; - if (part.content_type.has_value() && (*part.content_type) == CONTENT_TYPE_APP_FORM_URLENCODED) { + if (part.content_type.has_value() && !std::ranges::search(*part.content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) { f$parse_str(body, post[name]); } else { post.set_value(name, string(part.body.data(), part.body.size())); From 67a453eba0ede10097188d7954f33002e84542c4 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:26:00 +0300 Subject: [PATCH 11/26] small fix --- .idea/misc.xml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 307dd609cd..5ff349e446 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,8 +1,5 @@ - - @@ -31,4 +28,4 @@ - \ No newline at end of file + From 3b6cc4300dcfc0ea99d7244bdf71969526395222 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:30:07 +0300 Subject: [PATCH 12/26] small fix --- runtime-light/server/http/init-functions.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 0c8b971a15..28cb2d1676 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -438,17 +438,12 @@ kphp::coro::task<> finalize_server() noexcept { [[fallthrough]]; } case kphp::http::response_state::completed: - const array files = superglobals.v$_FILES.to_array(); + const array files{superglobals.v$_FILES.to_array()}; for (array::const_iterator it = files.begin(); it != files.end(); ++it) { - const mixed& file = it.get_value(); - - if (!file.is_array()) { - kphp::log::error("$_FILES contains a value that is not an array"); - continue; - } - - const mixed tmp_filename = file.get_value(string("tmp_name")); - f$unlink(tmp_filename.to_string()); + const mixed& file{it.get_value()}; + const string tmp_filename{file.get_value(string("tmp_name")).to_string()}; + const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; + std::ignore = k2::unlink(tmp_filename_view); } co_return; } From 5cce84c6b25bf533284bc85b877c90c63f1b4fa6 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:31:21 +0300 Subject: [PATCH 13/26] small fix --- runtime-light/server/server.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime-light/server/server.cmake b/runtime-light/server/server.cmake index ff4ef042e7..bf229f2128 100644 --- a/runtime-light/server/server.cmake +++ b/runtime-light/server/server.cmake @@ -3,7 +3,7 @@ prepend( server/ cli/cli-instance-state.cpp http/init-functions.cpp - http/multipart/details/parts-processing.cpp + http/multipart/details/parts-processing.cpp http/http-server-state.cpp job-worker/job-worker-server-state.cpp rpc/init-functions.cpp From 822ddad5d221e5878ad39dd9a5a892d51ef12fc5 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:47:24 +0300 Subject: [PATCH 14/26] small fix --- .../server/http/multipart/details/parts-parsing.h | 5 +++-- .../http/multipart/details/parts-processing.cpp | 7 ++++--- runtime-light/server/http/multipart/multipart.h | 12 +++++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h index 76927f343f..55cd06ea5d 100644 --- a/runtime-light/server/http/multipart/details/parts-parsing.h +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -79,8 +79,9 @@ struct part_attribute { return std::nullopt; } - if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) { - value_view = value_view.substr(1, value_view.size() - 2); + if (value_view.starts_with('"') && value_view.ends_with('"')) { + value_view.remove_suffix(1); + value_view.remove_prefix(1); } return part_attribute{name_view, value_view}; } diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 573657ee5f..367f3c6843 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -4,7 +4,9 @@ #include "runtime-light/server/http/multipart/details/parts-processing.h" +#include #include +#include #include #include "runtime-common/core/core-types/decl/optional.h" @@ -71,7 +73,6 @@ void process_upload_multipart(const kphp::http::multipart::details::part& part, error_code = UPLOAD_ERR_PARTIAL; } } else { - *file_res->close(); error_code = UPLOAD_ERR_CANT_WRITE; } @@ -95,7 +96,7 @@ void process_upload_multipart(const kphp::http::multipart::details::part& part, file[string("type")].push_back(string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); file[string("size")].push_back(static_cast(file_size)); file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); - file[string("error")].push_back(0); + file[string("error")].push_back(UPLOAD_ERR_OK); } } else { mixed& file = files[name]; @@ -108,7 +109,7 @@ void process_upload_multipart(const kphp::http::multipart::details::part& part, file.set_value(string("type"), string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); file.set_value(string("size"), static_cast(file_size)); file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); - file.set_value(string("error"), 0); + file.set_value(string("error"), UPLOAD_ERR_OK); } } } diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h index 9f18cb6879..ee89d1c083 100644 --- a/runtime-light/server/http/multipart/multipart.h +++ b/runtime-light/server/http/multipart/multipart.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -29,12 +30,13 @@ inline std::optional extract_boundary(std::string_view content if (pos == std::string_view::npos) { return std::nullopt; } - // todo assert "body" - std::string_view res{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; - if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) { - res = res.substr(1, res.size() - 2); + + std::string_view boundary_view{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; + if (boundary_view.starts_with('"') && boundary_view.ends_with('"')) { + boundary_view.remove_suffix(1); + boundary_view.remove_prefix(1); } - return res; + return boundary_view; } } // namespace kphp::http::multipart From 49301a0e1866446cbaac6487165b51feb41ef85a Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:55:52 +0300 Subject: [PATCH 15/26] small fix --- runtime-light/server/http/init-functions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 28cb2d1676..198088610f 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -405,12 +405,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform( - http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), - [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), + std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{ + .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } From c6e282513f2605d5ce75f826f7fcde2f4738dafd Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 6 Mar 2026 16:58:12 +0300 Subject: [PATCH 16/26] small fix --- runtime-light/stdlib/rpc/rpc-api.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/runtime-light/stdlib/rpc/rpc-api.cpp b/runtime-light/stdlib/rpc/rpc-api.cpp index 4b26403d9d..6ac41e3272 100644 --- a/runtime-light/stdlib/rpc/rpc-api.cpp +++ b/runtime-light/stdlib/rpc/rpc-api.cpp @@ -344,10 +344,11 @@ kphp::coro::task send_request(std::string_view actor, std co_return std::move(opt_response); }}; - static constexpr auto ignore_answer_awaiter_coroutine{[](kphp::component::stream stream, std::chrono::milliseconds timeout) noexcept -> kphp::coro::shared_task { - auto fetch_task{kphp::component::fetch_response(stream, [](std::span) noexcept {})}; - std::ignore = co_await kphp::coro::io_scheduler::get().schedule(std::move(fetch_task), timeout); - }}; + static constexpr auto ignore_answer_awaiter_coroutine{ + [](kphp::component::stream stream, std::chrono::milliseconds timeout) noexcept -> kphp::coro::shared_task { + auto fetch_task{kphp::component::fetch_response(stream, [](std::span) noexcept {})}; + std::ignore = co_await kphp::coro::io_scheduler::get().schedule(std::move(fetch_task), timeout); + }}; // normalize timeout using namespace std::chrono_literals; From 5d96fadbf9dd0c03ef125f25aad532fc6cd4970c Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 11 Mar 2026 12:26:18 +0300 Subject: [PATCH 17/26] apply review --- runtime-light/server/http/init-functions.cpp | 24 ++-- .../multipart/details/parts-processing.cpp | 135 +++++++++++------- .../http/multipart/details/parts-processing.h | 2 +- .../server/http/multipart/multipart.h | 2 +- 4 files changed, 96 insertions(+), 67 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 198088610f..dba365ea04 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -322,19 +322,19 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector(invoke_http.body.data()), static_cast(invoke_http.body.size())}; if (!std::ranges::search(content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) { + string body{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; f$parse_str(body, superglobals.v$_POST); http_server_instance_st.opt_raw_post_data.emplace(std::move(body)); } else if (!std::ranges::search(content_type, CONTENT_TYPE_MULTIPART_FORM_DATA).empty()) { - auto boundary_opt{kphp::http::multipart::extract_boundary(content_type)}; - if (boundary_opt.has_value()) { - kphp::http::multipart::process_multipart_content_type({body.c_str(), body.size()}, *boundary_opt, superglobals); + if (auto boundary_opt{kphp::http::multipart::extract_boundary(content_type)}; boundary_opt.has_value()) { + std::string_view body_view{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; + kphp::http::multipart::process_multipart_content_type(body_view, *boundary_opt, superglobals); } } else { + string body{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; http_server_instance_st.opt_raw_post_data.emplace(std::move(body)); } - server.set_value(string{CONTENT_TYPE.data(), CONTENT_TYPE.size()}, string{content_type.data(), static_cast(content_type.size())}); break; } @@ -405,12 +405,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), - std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{ - .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform( + http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), + [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } @@ -441,7 +441,7 @@ kphp::coro::task<> finalize_server() noexcept { const array files{superglobals.v$_FILES.to_array()}; for (array::const_iterator it = files.begin(); it != files.end(); ++it) { const mixed& file{it.get_value()}; - const string tmp_filename{file.get_value(string("tmp_name")).to_string()}; + const string tmp_filename{file.get_value(string{"tmp_name"}).to_string()}; const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; std::ignore = k2::unlink(tmp_filename_view); } diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 367f3c6843..6d484f6d44 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -7,12 +7,18 @@ #include #include #include +#include +#include +#include #include +#include -#include "runtime-common/core/core-types/decl/optional.h" #include "runtime-common/core/runtime-core.h" +#include "runtime-common/core/std/containers.h" #include "runtime-common/stdlib/server/url-functions.h" +#include "runtime-light/k2-platform/k2-api.h" #include "runtime-light/server/http/multipart/details/parts-parsing.h" +#include "runtime-light/state/component-state.h" #include "runtime-light/stdlib/diagnostics/logs.h" #include "runtime-light/stdlib/file/resource.h" #include "runtime-light/stdlib/math/random-functions.h" @@ -21,9 +27,6 @@ namespace { constexpr std::string_view CONTENT_TYPE_APP_FORM_URLENCODED = "application/x-www-form-urlencoded"; -constexpr int8_t TMP_FILENAME_LENGTH = 10; -constexpr std::string_view TMP_DIR = "/tmp/"; - constexpr std::string_view DEFAULT_CONTENT_TYPE = "text/plain"; constexpr int32_t UPLOAD_ERR_OK = 0; @@ -35,6 +38,54 @@ constexpr int32_t UPLOAD_ERR_NO_FILE = 4; constexpr int32_t UPLOAD_ERR_CANT_WRITE = 7; // constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp +std::optional> generate_temporary_name() noexcept { + static constexpr std::string_view LETTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + static constexpr auto random_letter = []() noexcept { + int64_t pos{f$mt_rand(0, LETTERS.size() - 1)}; + return LETTERS[pos]; + }; + static constexpr int64_t GENERATE_ATTEMPTS = 4; + static constexpr int64_t SYMBOLS_COUNT = 6; + + const auto& component_st{ComponentState::get()}; + auto tmp_dir_env{component_st.env.get_value(string{"TMPDIR"})}; + + std::string_view tmp_path{tmp_dir_env.is_string() ? std::string_view{tmp_dir_env.as_string().c_str(), tmp_dir_env.as_string().size()} : P_tmpdir}; + + for (int64_t attempt = 0; attempt < GENERATE_ATTEMPTS; ++attempt) { + kphp::stl::string tmp_name{tmp_path.data(), tmp_path.size()}; + tmp_name.push_back('/'); + for (auto _ : std::views::iota(0, SYMBOLS_COUNT)) { + tmp_name.push_back(random_letter()); + } + auto is_exists_res{k2::access(tmp_name, F_OK)}; + if (!is_exists_res.has_value()) { + return tmp_name; + } + } + return std::nullopt; +} + +std::expected write_temporary_file(std::string_view tmp_name, std::span content) noexcept { + auto file_res{kphp::fs::file::open(tmp_name, "w")}; + size_t file_size{}; + if (file_res.has_value()) { + const auto written_res{(*file_res).write(content)}; + if (written_res.has_value()) { + file_size = *written_res; + if (file_size < content.size()) { + return std::unexpected{UPLOAD_ERR_PARTIAL}; + } + } else { + return std::unexpected{UPLOAD_ERR_CANT_WRITE}; + } + + } else { + return std::unexpected{UPLOAD_ERR_NO_FILE}; + } + return file_size; +} + } // namespace namespace kphp::http::multipart::details { @@ -49,67 +100,45 @@ void process_post_multipart(const kphp::http::multipart::details::part& part, mi } } -void process_upload_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept { - // TODO: replace f$random_bytes to avoid string allocation - Optional rand_str{f$random_bytes(TMP_FILENAME_LENGTH)}; +void process_file_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept { + kphp::log::assertion(part.filename_attribute.has_value()); - if (!rand_str.has_value()) [[unlikely]] { - // kphp::log::warning("error generating random_bytes for tmp file"); + auto tmp_name_opt{generate_temporary_name()}; + if (!tmp_name_opt.has_value()) { + kphp::log::warning("cannot generate unique name for multipart temporary file"); return; } - - string tmp_name_str{TMP_DIR.data(), TMP_DIR.size()}; - tmp_name_str.append(rand_str.val()); - std::string_view tmp_name{tmp_name_str.c_str(), tmp_name_str.size()}; - - auto file_res{kphp::fs::file::open(tmp_name, "w")}; - int32_t error_code{UPLOAD_ERR_OK}; - size_t file_size{}; - if (file_res.has_value()) { - const auto written_res{(*file_res).write({reinterpret_cast(part.body.data()), part.body.size()})}; - if (written_res.has_value()) { - file_size = *written_res; - if (file_size < part.body.size()) { - error_code = UPLOAD_ERR_PARTIAL; - } - } else { - error_code = UPLOAD_ERR_CANT_WRITE; - } - - } else { - error_code = UPLOAD_ERR_NO_FILE; - } - - kphp::log::assertion(part.filename_attribute.has_value()); + auto tmp_name{*tmp_name_opt}; + auto write_res{write_temporary_file(tmp_name, {reinterpret_cast(part.body.data()), part.body.size()})}; const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; if (part.name_attribute.ends_with("[]")) { mixed& file = files[name.substr(0, name.size() - 2)]; - if (error_code != UPLOAD_ERR_OK) { - file[string("name")].push_back(string()); - file[string("type")].push_back(string()); - file[string("size")].push_back(0); - file[string("tmp_name")].push_back(string()); - file[string("error")].push_back(error_code); + if (!write_res.has_value()) { + file[string{"name"}].push_back(string()); + file[string{"type"}].push_back(string()); + file[string{"size"}].push_back(0); + file[string{"tmp_name"}].push_back(string()); + file[string{"error"}].push_back(write_res.error()); } else { - file[string("name")].push_back(string((*part.filename_attribute).data(), (*part.filename_attribute).size())); - file[string("type")].push_back(string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); - file[string("size")].push_back(static_cast(file_size)); - file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size())); - file[string("error")].push_back(UPLOAD_ERR_OK); + file[string{"name"}].push_back(string((*part.filename_attribute).data(), (*part.filename_attribute).size())); + file[string{"type"}].push_back(string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); + file[string{"size"}].push_back(static_cast(*write_res)); + file[string{"tmp_name"}].push_back(string(tmp_name.data(), tmp_name.size())); + file[string{"error"}].push_back(UPLOAD_ERR_OK); } } else { mixed& file = files[name]; - if (error_code != UPLOAD_ERR_OK) { - file.set_value(string("size"), 0); - file.set_value(string("tmp_name"), string()); - file.set_value(string("error"), error_code); + if (!write_res.has_value()) { + file.set_value(string{"size"}, 0); + file.set_value(string{"tmp_name"}, string()); + file.set_value(string{"error"}, write_res.error()); } else { - file.set_value(string("name"), string((*part.filename_attribute).data(), (*part.filename_attribute).size())); - file.set_value(string("type"), string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); - file.set_value(string("size"), static_cast(file_size)); - file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size())); - file.set_value(string("error"), UPLOAD_ERR_OK); + file.set_value(string{"name"}, string((*part.filename_attribute).data(), (*part.filename_attribute).size())); + file.set_value(string{"type"}, string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); + file.set_value(string{"size"}, static_cast(*write_res)); + file.set_value(string{"tmp_name"}, string(tmp_name.data(), tmp_name.size())); + file.set_value(string{"error"}, UPLOAD_ERR_OK); } } } diff --git a/runtime-light/server/http/multipart/details/parts-processing.h b/runtime-light/server/http/multipart/details/parts-processing.h index f7f8768e60..d3f6598b96 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.h +++ b/runtime-light/server/http/multipart/details/parts-processing.h @@ -11,6 +11,6 @@ namespace kphp::http::multipart::details { void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept; -void process_upload_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept; +void process_file_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept; } // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h index ee89d1c083..21da1cc915 100644 --- a/runtime-light/server/http/multipart/multipart.h +++ b/runtime-light/server/http/multipart/multipart.h @@ -18,7 +18,7 @@ constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; inline void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { for (const auto& part : details::parse_multipart_parts(body, boundary)) { if (part.filename_attribute.has_value()) { - details::process_upload_multipart(part, superglobals.v$_FILES); + details::process_file_multipart(part, superglobals.v$_FILES); } else { details::process_post_multipart(part, superglobals.v$_POST); } From ca69ddeed262704824816ab08b90dcbf487d0db8 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 11 Mar 2026 15:43:54 +0300 Subject: [PATCH 18/26] add test assert --- tests/python/tests/http_server/test_multipart.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/python/tests/http_server/test_multipart.py b/tests/python/tests/http_server/test_multipart.py index 5413618f93..ac6d1fa011 100644 --- a/tests/python/tests/http_server/test_multipart.py +++ b/tests/python/tests/http_server/test_multipart.py @@ -1,3 +1,4 @@ +import os from urllib.parse import urlencode from python.lib.testcase import WebServerAutoTestCase @@ -37,6 +38,8 @@ def test_multipart_name_attributes(self): self.assertTrue(response.content.find(b"role : admin") != -1) def test_multipart_filename_attribute(self): + + tmp_files = os.listdir("/tmp/") boundary = "------------------------d74496d66958873e" file_bytes = b"Hello from test.txt\nSecond line\n" @@ -67,6 +70,10 @@ def test_multipart_filename_attribute(self): self.assertTrue(response.content.find(b"filename : test.txt") != -1) self.assertTrue(response.content.find(b"Hello from test.txt") != -1) + tmp_files_after_script = os.listdir("/tmp/") + # check that script delete tmp files at the end + self.assertEqual(sorted(tmp_files), sorted(tmp_files_after_script)) + def test_multipart_filename_array_attribute(self): boundary = "------------------------d74496d66958873e" From 97c99313c0cffc0d67e43171d45d7527f2bd7d87 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 11 Mar 2026 15:47:44 +0300 Subject: [PATCH 19/26] fix format --- runtime-light/server/http/init-functions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index dba365ea04..8b5985e634 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -405,12 +405,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform( - http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), - [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), + std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{ + .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } From 920e25ea2807649efdf101c80a19a6a56e891307 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 11 Mar 2026 16:06:10 +0300 Subject: [PATCH 20/26] fix format --- runtime-light/server/http/init-functions.cpp | 2 +- .../server/http/multipart/details/parts-processing.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 8b5985e634..96071221d3 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -439,7 +439,7 @@ kphp::coro::task<> finalize_server() noexcept { } case kphp::http::response_state::completed: const array files{superglobals.v$_FILES.to_array()}; - for (array::const_iterator it = files.begin(); it != files.end(); ++it) { + for (array::const_iterator it{files.begin()}; it != files.end(); ++it) { const mixed& file{it.get_value()}; const string tmp_filename{file.get_value(string{"tmp_name"}).to_string()}; const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 6d484f6d44..d073602681 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -52,7 +52,7 @@ std::optional> generate_tempor std::string_view tmp_path{tmp_dir_env.is_string() ? std::string_view{tmp_dir_env.as_string().c_str(), tmp_dir_env.as_string().size()} : P_tmpdir}; - for (int64_t attempt = 0; attempt < GENERATE_ATTEMPTS; ++attempt) { + for (int64_t attempt{}; attempt < GENERATE_ATTEMPTS; ++attempt) { kphp::stl::string tmp_name{tmp_path.data(), tmp_path.size()}; tmp_name.push_back('/'); for (auto _ : std::views::iota(0, SYMBOLS_COUNT)) { From 279724e3c335cd2a6d9827d2af04102d0e914b32 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Wed, 11 Mar 2026 17:07:57 +0300 Subject: [PATCH 21/26] apply review --- runtime-light/server/http/init-functions.cpp | 33 ++++++--- .../http/multipart/details/parts-parsing.h | 2 - .../multipart/details/parts-processing.cpp | 68 +++++++++---------- .../server/http/multipart/multipart.h | 6 +- .../tests/http_server/test_multipart.py | 6 ++ 5 files changed, 66 insertions(+), 49 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 96071221d3..10ed781017 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -39,6 +39,8 @@ #include "runtime-light/tl/tl-functions.h" #include "runtime-light/tl/tl-types.h" +#include "runtime-light/stdlib/output/print-functions.h" + namespace { constexpr std::string_view EMPTY = ""; @@ -405,12 +407,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), - std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{ - .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform( + http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), + [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } @@ -439,11 +441,20 @@ kphp::coro::task<> finalize_server() noexcept { } case kphp::http::response_state::completed: const array files{superglobals.v$_FILES.to_array()}; - for (array::const_iterator it{files.begin()}; it != files.end(); ++it) { - const mixed& file{it.get_value()}; - const string tmp_filename{file.get_value(string{"tmp_name"}).to_string()}; - const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; - std::ignore = k2::unlink(tmp_filename_view); + for (const auto& files_it : files) { + const mixed& file{files_it.get_value()}; + const mixed& tmp_filenames{file.get_value(string{"tmp_name"})}; + if (tmp_filenames.is_array()) { + for (const auto& tmp_filename_it : tmp_filenames) { + const string tmp_filename{tmp_filename_it.get_value().as_string()}; + const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; + std::ignore = k2::unlink(tmp_filename_view); + } + } else { + const string tmp_filename{tmp_filenames.to_string()}; + const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; + std::ignore = k2::unlink(tmp_filename_view); + } } co_return; } diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h index 55cd06ea5d..9ab4b201bf 100644 --- a/runtime-light/server/http/multipart/details/parts-parsing.h +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -15,7 +14,6 @@ #include #include "common/algorithms/string-algorithms.h" -#include "runtime-light/server/http/http-server-state.h" namespace kphp::http::multipart::details { diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index d073602681..2b5d17d85d 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -22,6 +22,7 @@ #include "runtime-light/stdlib/diagnostics/logs.h" #include "runtime-light/stdlib/file/resource.h" #include "runtime-light/stdlib/math/random-functions.h" +#include "runtime-light/stdlib/output/print-functions.h" namespace { @@ -30,12 +31,14 @@ constexpr std::string_view CONTENT_TYPE_APP_FORM_URLENCODED = "application/x-www constexpr std::string_view DEFAULT_CONTENT_TYPE = "text/plain"; constexpr int32_t UPLOAD_ERR_OK = 0; -// constexpr int32_t UPLOAD_ERR_INI_SIZE = 1; // unused in kphp -// constexpr int32_t UPLOAD_ERR_FORM_SIZE = 2; // todo support header max-file-size constexpr int32_t UPLOAD_ERR_PARTIAL = 3; constexpr int32_t UPLOAD_ERR_NO_FILE = 4; -// constexpr int32_t UPLOAD_ERR_NO_TMP_DIR = 6; // todo support check tmp dir constexpr int32_t UPLOAD_ERR_CANT_WRITE = 7; + +// Not implemented : +// constexpr int32_t UPLOAD_ERR_INI_SIZE = 1; // unused in kphp +// constexpr int32_t UPLOAD_ERR_FORM_SIZE = 2; // todo support header max-file-size +// constexpr int32_t UPLOAD_ERR_NO_TMP_DIR = 6; // todo support check tmp dir // constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp std::optional> generate_temporary_name() noexcept { @@ -68,22 +71,20 @@ std::optional> generate_tempor std::expected write_temporary_file(std::string_view tmp_name, std::span content) noexcept { auto file_res{kphp::fs::file::open(tmp_name, "w")}; - size_t file_size{}; if (file_res.has_value()) { const auto written_res{(*file_res).write(content)}; if (written_res.has_value()) { - file_size = *written_res; + size_t file_size{*written_res}; if (file_size < content.size()) { return std::unexpected{UPLOAD_ERR_PARTIAL}; } + return file_size; } else { return std::unexpected{UPLOAD_ERR_CANT_WRITE}; } - } else { return std::unexpected{UPLOAD_ERR_NO_FILE}; } - return file_size; } } // namespace @@ -96,7 +97,7 @@ void process_post_multipart(const kphp::http::multipart::details::part& part, mi if (part.content_type.has_value() && !std::ranges::search(*part.content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) { f$parse_str(body, post[name]); } else { - post.set_value(name, string(part.body.data(), part.body.size())); + post.set_value(name, body); } } @@ -111,35 +112,34 @@ void process_file_multipart(const kphp::http::multipart::details::part& part, mi auto tmp_name{*tmp_name_opt}; auto write_res{write_temporary_file(tmp_name, {reinterpret_cast(part.body.data()), part.body.size()})}; - const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + mixed file{}; + if (!write_res.has_value()) { + file.set_value(string{"size"}, 0); + file.set_value(string{"tmp_name"}, string{}); + file.set_value(string{"error"}, write_res.error()); + } else { + const auto content_type{part.content_type.value_or(DEFAULT_CONTENT_TYPE)}; + file.set_value(string{"name"}, string{(*part.filename_attribute).data(), static_cast((*part.filename_attribute).size())}); + file.set_value(string{"type"}, string{content_type.data(), static_cast(content_type.size())}); + file.set_value(string{"size"}, static_cast(*write_res)); + file.set_value(string{"tmp_name"}, string{tmp_name.data(), static_cast(tmp_name.size())}); + file.set_value(string{"error"}, UPLOAD_ERR_OK); + } + if (part.name_attribute.ends_with("[]")) { - mixed& file = files[name.substr(0, name.size() - 2)]; - if (!write_res.has_value()) { - file[string{"name"}].push_back(string()); - file[string{"type"}].push_back(string()); - file[string{"size"}].push_back(0); - file[string{"tmp_name"}].push_back(string()); - file[string{"error"}].push_back(write_res.error()); - } else { - file[string{"name"}].push_back(string((*part.filename_attribute).data(), (*part.filename_attribute).size())); - file[string{"type"}].push_back(string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); - file[string{"size"}].push_back(static_cast(*write_res)); - file[string{"tmp_name"}].push_back(string(tmp_name.data(), tmp_name.size())); - file[string{"error"}].push_back(UPLOAD_ERR_OK); + const string name{part.name_attribute.data(), static_cast(part.name_attribute.size() - 2)}; + mixed file_array{files.get_value(name)}; + + for (auto& attribute_it : file) { + string attribute{attribute_it.get_key().to_string()}; + mixed file_array_value{file_array.get_value(attribute)}; + file_array_value.push_back(attribute_it.get_value().to_string()); + file_array.set_value(attribute, file_array_value); } + files.set_value(name, file_array); } else { - mixed& file = files[name]; - if (!write_res.has_value()) { - file.set_value(string{"size"}, 0); - file.set_value(string{"tmp_name"}, string()); - file.set_value(string{"error"}, write_res.error()); - } else { - file.set_value(string{"name"}, string((*part.filename_attribute).data(), (*part.filename_attribute).size())); - file.set_value(string{"type"}, string(part.content_type.value_or(DEFAULT_CONTENT_TYPE).data(), part.content_type.value_or(DEFAULT_CONTENT_TYPE).size())); - file.set_value(string{"size"}, static_cast(*write_res)); - file.set_value(string{"tmp_name"}, string(tmp_name.data(), tmp_name.size())); - file.set_value(string{"error"}, UPLOAD_ERR_OK); - } + const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + files.set_value(name, file); } } } // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h index 21da1cc915..cef655c125 100644 --- a/runtime-light/server/http/multipart/multipart.h +++ b/runtime-light/server/http/multipart/multipart.h @@ -13,7 +13,9 @@ namespace kphp::http::multipart { +namespace details { constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; +} // namespace details inline void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { for (const auto& part : details::parse_multipart_parts(body, boundary)) { @@ -26,12 +28,12 @@ inline void process_multipart_content_type(std::string_view body, std::string_vi } inline std::optional extract_boundary(std::string_view content_type) noexcept { - const size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)}; + const size_t pos{content_type.find(details::MULTIPART_BOUNDARY_EQ)}; if (pos == std::string_view::npos) { return std::nullopt; } - std::string_view boundary_view{content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size())}; + std::string_view boundary_view{content_type.substr(pos + details::MULTIPART_BOUNDARY_EQ.size())}; if (boundary_view.starts_with('"') && boundary_view.ends_with('"')) { boundary_view.remove_suffix(1); boundary_view.remove_prefix(1); diff --git a/tests/python/tests/http_server/test_multipart.py b/tests/python/tests/http_server/test_multipart.py index ac6d1fa011..1436ec8df8 100644 --- a/tests/python/tests/http_server/test_multipart.py +++ b/tests/python/tests/http_server/test_multipart.py @@ -75,6 +75,8 @@ def test_multipart_filename_attribute(self): self.assertEqual(sorted(tmp_files), sorted(tmp_files_after_script)) def test_multipart_filename_array_attribute(self): + tmp_files = os.listdir("/tmp/") + boundary = "------------------------d74496d66958873e" # Two "files" (their raw bytes) @@ -117,6 +119,10 @@ def test_multipart_filename_array_attribute(self): self.assertTrue(response.content.find(b"Hello from a.txt") != -1) self.assertTrue(response.content.find(b"Hello from b.txt") != -1) + tmp_files_after_script = os.listdir("/tmp/") + # check that script delete tmp files at the end + self.assertEqual(sorted(tmp_files), sorted(tmp_files_after_script)) + def test_multipart_name_urlencoded_attribute(self): boundary = "------------------------d74496d66958873e" From 3b932474b45d3e2b784797d0ca62764a0aef3c2d Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Thu, 12 Mar 2026 12:30:27 +0300 Subject: [PATCH 22/26] fix format --- runtime-light/server/http/init-functions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 10ed781017..37c012cbbb 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -407,12 +407,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform( - http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), - [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), + std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{ + .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } From 3391f1fb6d5e8a84b3a2b42b589fa738ce6f1399 Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Thu, 12 Mar 2026 12:32:31 +0300 Subject: [PATCH 23/26] remove extra const --- runtime-light/server/http/init-functions.cpp | 8 ++++---- .../server/http/multipart/details/parts-processing.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 37c012cbbb..9f12f9a801 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -446,13 +446,13 @@ kphp::coro::task<> finalize_server() noexcept { const mixed& tmp_filenames{file.get_value(string{"tmp_name"})}; if (tmp_filenames.is_array()) { for (const auto& tmp_filename_it : tmp_filenames) { - const string tmp_filename{tmp_filename_it.get_value().as_string()}; - const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; + string tmp_filename{tmp_filename_it.get_value().as_string()}; + std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; std::ignore = k2::unlink(tmp_filename_view); } } else { - const string tmp_filename{tmp_filenames.to_string()}; - const std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; + string tmp_filename{tmp_filenames.to_string()}; + std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; std::ignore = k2::unlink(tmp_filename_view); } } diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 2b5d17d85d..80210e0e8d 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -72,7 +72,7 @@ std::optional> generate_tempor std::expected write_temporary_file(std::string_view tmp_name, std::span content) noexcept { auto file_res{kphp::fs::file::open(tmp_name, "w")}; if (file_res.has_value()) { - const auto written_res{(*file_res).write(content)}; + auto written_res{(*file_res).write(content)}; if (written_res.has_value()) { size_t file_size{*written_res}; if (file_size < content.size()) { @@ -127,7 +127,7 @@ void process_file_multipart(const kphp::http::multipart::details::part& part, mi } if (part.name_attribute.ends_with("[]")) { - const string name{part.name_attribute.data(), static_cast(part.name_attribute.size() - 2)}; + string name{part.name_attribute.data(), static_cast(part.name_attribute.size() - 2)}; mixed file_array{files.get_value(name)}; for (auto& attribute_it : file) { @@ -138,7 +138,7 @@ void process_file_multipart(const kphp::http::multipart::details::part& part, mi } files.set_value(name, file_array); } else { - const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; + string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; files.set_value(name, file); } } From 03d18094e3a0493db1badc7f324f509b949e107a Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Thu, 12 Mar 2026 12:38:24 +0300 Subject: [PATCH 24/26] add include --- runtime-light/server/http/multipart/details/parts-parsing.h | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h index 9ab4b201bf..7d73d48cb0 100644 --- a/runtime-light/server/http/multipart/details/parts-parsing.h +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -14,6 +14,7 @@ #include #include "common/algorithms/string-algorithms.h" +#include "runtime-light/server/http/http-server-state.h" namespace kphp::http::multipart::details { From a92ac9ec5f90f01bdd07eb0c1d4b281a92c8b35d Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 13 Mar 2026 15:10:14 +0300 Subject: [PATCH 25/26] apply review --- runtime-light/server/http/http-server-state.h | 2 + runtime-light/server/http/init-functions.cpp | 40 ++++-------- .../http/multipart/details/parts-parsing.h | 6 +- .../multipart/details/parts-processing.cpp | 52 ++++++++-------- .../http/multipart/details/parts-processing.h | 4 +- .../server/http/multipart/multipart.h | 43 ++++++++----- tests/python/tests/http_server/php/index.php | 6 ++ .../tests/http_server/test_multipart.py | 61 +++++++++++++++++++ 8 files changed, 145 insertions(+), 69 deletions(-) diff --git a/runtime-light/server/http/http-server-state.h b/runtime-light/server/http/http-server-state.h index 39d9e4d556..05bf927c62 100644 --- a/runtime-light/server/http/http-server-state.h +++ b/runtime-light/server/http/http-server-state.h @@ -70,6 +70,8 @@ struct HttpServerInstanceState final : private vk::not_copyable { // The headers_registered_callback function should only be invoked once std::optional> headers_registered_callback; + kphp::stl::unordered_set, kphp::memory::script_allocator> multipart_temporary_files; + private: kphp::stl::multimap, kphp::stl::string, kphp::memory::script_allocator> headers_; diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index 9f12f9a801..dc28f7e3a6 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -30,7 +30,6 @@ #include "runtime-light/state/instance-state.h" #include "runtime-light/stdlib/component/component-api.h" #include "runtime-light/stdlib/diagnostics/logs.h" -#include "runtime-light/stdlib/file/file-system-functions.h" #include "runtime-light/stdlib/output/output-state.h" #include "runtime-light/stdlib/server/http-functions.h" #include "runtime-light/stdlib/zlib/zlib-functions.h" @@ -39,8 +38,6 @@ #include "runtime-light/tl/tl-functions.h" #include "runtime-light/tl/tl-types.h" -#include "runtime-light/stdlib/output/print-functions.h" - namespace { constexpr std::string_view EMPTY = ""; @@ -329,9 +326,10 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector(invoke_http.body.data()), static_cast(invoke_http.body.size())}; - kphp::http::multipart::process_multipart_content_type(body_view, *boundary_opt, superglobals); + std::string_view body_view{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; + auto process_multipart_res{kphp::http::multipart::process_multipart_content_type(content_type, body_view, superglobals)}; + if (!process_multipart_res.has_value()) { + kphp::log::warning("{}", process_multipart_res.error()); } } else { string body{reinterpret_cast(invoke_http.body.data()), static_cast(invoke_http.body.size())}; @@ -385,7 +383,6 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector finalize_server() noexcept { auto& http_server_instance_st{HttpServerInstanceState::get()}; - auto& superglobals{InstanceState::get().php_script_mutable_globals_singleton.get_superglobals()}; string response_body{}; tl::HttpResponse http_response{}; @@ -407,12 +404,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), - std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{ - .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform( + http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), + [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; } @@ -440,21 +437,8 @@ kphp::coro::task<> finalize_server() noexcept { [[fallthrough]]; } case kphp::http::response_state::completed: - const array files{superglobals.v$_FILES.to_array()}; - for (const auto& files_it : files) { - const mixed& file{files_it.get_value()}; - const mixed& tmp_filenames{file.get_value(string{"tmp_name"})}; - if (tmp_filenames.is_array()) { - for (const auto& tmp_filename_it : tmp_filenames) { - string tmp_filename{tmp_filename_it.get_value().as_string()}; - std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; - std::ignore = k2::unlink(tmp_filename_view); - } - } else { - string tmp_filename{tmp_filenames.to_string()}; - std::string_view tmp_filename_view{tmp_filename.c_str(), tmp_filename.size()}; - std::ignore = k2::unlink(tmp_filename_view); - } + for (const auto& temporary_file : http_server_instance_st.multipart_temporary_files) { + std::ignore = k2::unlink(temporary_file); } co_return; } diff --git a/runtime-light/server/http/multipart/details/parts-parsing.h b/runtime-light/server/http/multipart/details/parts-parsing.h index 7d73d48cb0..abcfa6f440 100644 --- a/runtime-light/server/http/multipart/details/parts-parsing.h +++ b/runtime-light/server/http/multipart/details/parts-parsing.h @@ -43,10 +43,12 @@ struct part_header { static std::optional parse(std::string_view header) noexcept { auto [name_view, value_view]{vk::split_string_view(header, ':')}; + name_view = vk::trim(name_view); + value_view = vk::trim(value_view); if (name_view.empty() || value_view.empty()) { return std::nullopt; } - return part_header{name_view, value_view.substr(1)}; + return part_header{name_view, value_view}; } bool name_is(std::string_view header_name) const noexcept { @@ -74,6 +76,8 @@ struct part_attribute { static std::optional parse(std::string_view attribute) noexcept { auto [name_view, value_view]{vk::split_string_view(vk::trim(attribute), '=')}; + name_view = vk::trim(name_view); + value_view = vk::trim(value_view); if (name_view.empty() || value_view.empty()) { return std::nullopt; } diff --git a/runtime-light/server/http/multipart/details/parts-processing.cpp b/runtime-light/server/http/multipart/details/parts-processing.cpp index 80210e0e8d..be7e0a3318 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.cpp +++ b/runtime-light/server/http/multipart/details/parts-processing.cpp @@ -17,12 +17,12 @@ #include "runtime-common/core/std/containers.h" #include "runtime-common/stdlib/server/url-functions.h" #include "runtime-light/k2-platform/k2-api.h" +#include "runtime-light/server/http/http-server-state.h" #include "runtime-light/server/http/multipart/details/parts-parsing.h" #include "runtime-light/state/component-state.h" #include "runtime-light/stdlib/diagnostics/logs.h" #include "runtime-light/stdlib/file/resource.h" #include "runtime-light/stdlib/math/random-functions.h" -#include "runtime-light/stdlib/output/print-functions.h" namespace { @@ -42,14 +42,15 @@ constexpr int32_t UPLOAD_ERR_CANT_WRITE = 7; // constexpr int32_t UPLOAD_ERR_EXTENSION = 8; // unused in kphp std::optional> generate_temporary_name() noexcept { - static constexpr std::string_view LETTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - static constexpr auto random_letter = []() noexcept { + static constexpr std::string_view LETTERS{"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"}; + static constexpr auto random_letter{[]() noexcept { int64_t pos{f$mt_rand(0, LETTERS.size() - 1)}; return LETTERS[pos]; - }; + }}; static constexpr int64_t GENERATE_ATTEMPTS = 4; static constexpr int64_t SYMBOLS_COUNT = 6; + // todo rework with k2::tempnam or mkstemp const auto& component_st{ComponentState::get()}; auto tmp_dir_env{component_st.env.get_value(string{"TMPDIR"})}; @@ -71,48 +72,51 @@ std::optional> generate_tempor std::expected write_temporary_file(std::string_view tmp_name, std::span content) noexcept { auto file_res{kphp::fs::file::open(tmp_name, "w")}; - if (file_res.has_value()) { - auto written_res{(*file_res).write(content)}; - if (written_res.has_value()) { - size_t file_size{*written_res}; - if (file_size < content.size()) { - return std::unexpected{UPLOAD_ERR_PARTIAL}; - } - return file_size; - } else { - return std::unexpected{UPLOAD_ERR_CANT_WRITE}; - } - } else { + if (!file_res.has_value()) { return std::unexpected{UPLOAD_ERR_NO_FILE}; } + + auto written_res{(*file_res).write(content)}; + if (!written_res.has_value()) { + return std::unexpected{UPLOAD_ERR_CANT_WRITE}; + } + + size_t file_size{*written_res}; + if (file_size < content.size()) { + return std::unexpected{UPLOAD_ERR_PARTIAL}; + } + return file_size; } } // namespace namespace kphp::http::multipart::details { -void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept { +void process_post_multipart(const kphp::http::multipart::details::part& part, array& post) noexcept { const string name{part.name_attribute.data(), static_cast(part.name_attribute.size())}; const string body{part.body.data(), static_cast(part.body.size())}; if (part.content_type.has_value() && !std::ranges::search(*part.content_type, CONTENT_TYPE_APP_FORM_URLENCODED).empty()) { - f$parse_str(body, post[name]); + auto post_value{post.get_value(name)}; + f$parse_str(body, post_value); + post.set_value(name, std::move(post_value)); } else { post.set_value(name, body); } } -void process_file_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept { +void process_file_multipart(const kphp::http::multipart::details::part& part, array& files) noexcept { kphp::log::assertion(part.filename_attribute.has_value()); auto tmp_name_opt{generate_temporary_name()}; - if (!tmp_name_opt.has_value()) { - kphp::log::warning("cannot generate unique name for multipart temporary file"); - return; - } + kphp::log::assertion(tmp_name_opt.has_value()); auto tmp_name{*tmp_name_opt}; auto write_res{write_temporary_file(tmp_name, {reinterpret_cast(part.body.data()), part.body.size()})}; - mixed file{}; + if (write_res.has_value() || write_res.error() != UPLOAD_ERR_NO_FILE) { + HttpServerInstanceState::get().multipart_temporary_files.insert(*tmp_name_opt); + } + + array file{}; if (!write_res.has_value()) { file.set_value(string{"size"}, 0); file.set_value(string{"tmp_name"}, string{}); diff --git a/runtime-light/server/http/multipart/details/parts-processing.h b/runtime-light/server/http/multipart/details/parts-processing.h index d3f6598b96..fa2a0ceef1 100644 --- a/runtime-light/server/http/multipart/details/parts-processing.h +++ b/runtime-light/server/http/multipart/details/parts-processing.h @@ -9,8 +9,8 @@ namespace kphp::http::multipart::details { -void process_post_multipart(const kphp::http::multipart::details::part& part, mixed& post) noexcept; +void process_post_multipart(const kphp::http::multipart::details::part& part, array& post) noexcept; -void process_file_multipart(const kphp::http::multipart::details::part& part, mixed& files) noexcept; +void process_file_multipart(const kphp::http::multipart::details::part& part, array& files) noexcept; } // namespace kphp::http::multipart::details diff --git a/runtime-light/server/http/multipart/multipart.h b/runtime-light/server/http/multipart/multipart.h index cef655c125..bd9bb92234 100644 --- a/runtime-light/server/http/multipart/multipart.h +++ b/runtime-light/server/http/multipart/multipart.h @@ -5,9 +5,11 @@ #pragma once #include +#include #include #include +#include "runtime-light/core/globals/php-script-globals.h" #include "runtime-light/server/http/multipart/details/parts-parsing.h" #include "runtime-light/server/http/multipart/details/parts-processing.h" @@ -15,25 +17,20 @@ namespace kphp::http::multipart { namespace details { constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary="; -} // namespace details - -inline void process_multipart_content_type(std::string_view body, std::string_view boundary, PhpScriptBuiltInSuperGlobals& superglobals) noexcept { - for (const auto& part : details::parse_multipart_parts(body, boundary)) { - if (part.filename_attribute.has_value()) { - details::process_file_multipart(part, superglobals.v$_FILES); - } else { - details::process_post_multipart(part, superglobals.v$_POST); - } - } -} inline std::optional extract_boundary(std::string_view content_type) noexcept { - const size_t pos{content_type.find(details::MULTIPART_BOUNDARY_EQ)}; - if (pos == std::string_view::npos) { + const size_t boundary_start{content_type.find(details::MULTIPART_BOUNDARY_EQ)}; + if (boundary_start == std::string_view::npos) { return std::nullopt; } - std::string_view boundary_view{content_type.substr(pos + details::MULTIPART_BOUNDARY_EQ.size())}; + size_t boundary_end{content_type.find(';', boundary_start)}; + if (boundary_end == std::string_view::npos) { + boundary_end = content_type.size(); + } + + std::string_view boundary_view{ + content_type.substr(boundary_start + details::MULTIPART_BOUNDARY_EQ.size(), boundary_end - boundary_start - details::MULTIPART_BOUNDARY_EQ.size())}; if (boundary_view.starts_with('"') && boundary_view.ends_with('"')) { boundary_view.remove_suffix(1); boundary_view.remove_prefix(1); @@ -41,4 +38,22 @@ inline std::optional extract_boundary(std::string_view content return boundary_view; } +} // namespace details + +inline std::expected process_multipart_content_type(std::string_view content_type, std::string_view body, + PhpScriptBuiltInSuperGlobals& superglobals) noexcept { + auto boundary_opt{details::extract_boundary(content_type)}; + if (!boundary_opt.has_value()) { + return std::unexpected{"cannot extract boundary in multipart content type"}; + } + for (const auto& part : details::parse_multipart_parts(body, *boundary_opt)) { + if (part.filename_attribute.has_value()) { + details::process_file_multipart(part, superglobals.v$_FILES.as_array()); + } else { + details::process_post_multipart(part, superglobals.v$_POST.as_array()); + } + } + return {}; +} + } // namespace kphp::http::multipart diff --git a/tests/python/tests/http_server/php/index.php b/tests/python/tests/http_server/php/index.php index 67ec91d289..d0ea3e60da 100644 --- a/tests/python/tests/http_server/php/index.php +++ b/tests/python/tests/http_server/php/index.php @@ -263,6 +263,12 @@ public function work(string $output) { echo $_POST["form"]['name'] . "\n"; echo $_POST["form"]['note'] . "\n"; break; + case "non_terminating_boundary": + echo "name : " . $_POST["name"] . "\n"; + break; + case "superglobal_modify": + $_FILES = ["file" => ["tmp_name" => "not_exists.txt"]]; + break; default: echo "ERROR"; return; diff --git a/tests/python/tests/http_server/test_multipart.py b/tests/python/tests/http_server/test_multipart.py index 1436ec8df8..9d09383961 100644 --- a/tests/python/tests/http_server/test_multipart.py +++ b/tests/python/tests/http_server/test_multipart.py @@ -37,6 +37,32 @@ def test_multipart_name_attributes(self): self.assertTrue(response.content.find(b"name : Ivan") != -1) self.assertTrue(response.content.find(b"role : admin") != -1) + def test_multipart_non_terminating_boundary(self): + boundary = "------------------------d74496d66958873e" + + data = (f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="name"\r\n' + "\r\n" + "Ivan\r\n" + f"--{boundary}\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}; charset=UTF-8", + "Content-Length": str(len(data)), # keep if http_request doesn't auto-set it + } + + response = self.web_server.http_request( + uri="/test_multipart?type=non_terminating_boundary", + method="POST", + headers=headers, + data=data, # body goes here + ) + + self.assertEqual(200, response.status_code) + self.assertTrue(response.content.find(b"name : Ivan") != -1) + def test_multipart_filename_attribute(self): tmp_files = os.listdir("/tmp/") @@ -123,6 +149,41 @@ def test_multipart_filename_array_attribute(self): # check that script delete tmp files at the end self.assertEqual(sorted(tmp_files), sorted(tmp_files_after_script)) + def test_multipart_superglobal_modify(self): + + tmp_files = os.listdir("/tmp/") + boundary = "------------------------d74496d66958873e" + + file_bytes = b"Hello from test.txt\nSecond line\n" + + data = (f"--{boundary}\r\n" + 'Content-Disposition: form-data; name="file"; filename="test.txt"\r\n' + "Content-Type: text/plain\r\n" + "\r\n" + ).encode("utf-8") + file_bytes + ( + "\r\n" + f"--{boundary}--\r\n" + ).encode("utf-8") + + headers = { + "Accept": "*/*", + "Content-Type": f"multipart/form-data; boundary={boundary}", + "Content-Length": str(len(data)), + } + + response = self.web_server.http_request( + uri="/test_multipart?type=superglobal_modify", + method="POST", + headers=headers, + data=data, + ) + + self.assertEqual(200, response.status_code) + + tmp_files_after_script = os.listdir("/tmp/") + # check that script delete tmp files at the end + self.assertEqual(sorted(tmp_files), sorted(tmp_files_after_script)) + def test_multipart_name_urlencoded_attribute(self): boundary = "------------------------d74496d66958873e" From 1defac9e0e150f9d7ad69b21795681c1c99eb84e Mon Sep 17 00:00:00 2001 From: Vadim Sadokhov Date: Fri, 13 Mar 2026 17:30:31 +0300 Subject: [PATCH 26/26] fix format --- runtime-light/server/http/init-functions.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/runtime-light/server/http/init-functions.cpp b/runtime-light/server/http/init-functions.cpp index dc28f7e3a6..d499cdb364 100644 --- a/runtime-light/server/http/init-functions.cpp +++ b/runtime-light/server/http/init-functions.cpp @@ -404,12 +404,12 @@ kphp::coro::task<> finalize_server() noexcept { } // fill headers http_response.http_response.headers.value.reserve(http_server_instance_st.headers().size()); - std::transform( - http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), std::back_inserter(http_response.http_response.headers.value), - [](const auto& header_entry) noexcept { - const auto& [name, value]{header_entry}; - return tl::httpHeaderEntry{.is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; - }); + std::transform(http_server_instance_st.headers().cbegin(), http_server_instance_st.headers().cend(), + std::back_inserter(http_response.http_response.headers.value), [](const auto& header_entry) noexcept { + const auto& [name, value]{header_entry}; + return tl::httpHeaderEntry{ + .is_sensitive = {}, .name = {.value = {name.data(), name.size()}}, .value = {.value = {value.data(), value.size()}}}; + }); http_server_instance_st.response_state = kphp::http::response_state::headers_sent; [[fallthrough]]; }