Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ if(ONE_INDEX OR ONE_SERVER)
add_subdirectory(src/shared)
add_subdirectory(src/gzip)
add_subdirectory(src/metapack)
add_subdirectory(src/search)
endif()

if(ONE_INDEX)
Expand Down Expand Up @@ -124,6 +125,7 @@ if(ONE_TESTS)
if(ONE_INDEX OR ONE_SERVER)
add_subdirectory(test/unit/gzip)
add_subdirectory(test/unit/metapack)
add_subdirectory(test/unit/search)
endif()

if(ONE_INDEX)
Expand Down
1 change: 1 addition & 0 deletions src/index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ endif()
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::resolver)
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::shared)
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::metapack)
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::search)
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::configuration)
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::web)

Expand Down
56 changes: 18 additions & 38 deletions src/index/explorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <sourcemeta/one/configuration.h>
#include <sourcemeta/one/metapack.h>
#include <sourcemeta/one/resolver.h>
#include <sourcemeta/one/search.h>
#include <sourcemeta/one/shared.h>

#include <sourcemeta/core/json.h>
Expand Down Expand Up @@ -499,7 +500,7 @@ struct GENERATE_EXPLORER_SEARCH_INDEX {
const sourcemeta::one::Configuration &,
const sourcemeta::core::JSON &) -> void {
const auto timestamp_start{std::chrono::steady_clock::now()};
std::vector<sourcemeta::core::JSON> result;
std::vector<sourcemeta::one::SearchEntry> entries;

for (const auto &dependency : action.dependencies) {
const auto directory_option{
Expand All @@ -515,48 +516,27 @@ struct GENERATE_EXPLORER_SEARCH_INDEX {
continue;
}

auto entry{sourcemeta::core::JSON::make_array()};
entry.push_back(
sourcemeta::core::JSON{directory_entry.at("path").to_string()});
entry.push_back(directory_entry.defines("title")
? directory_entry.at("title")
: sourcemeta::core::JSON{""});
entry.push_back(directory_entry.defines("description")
? directory_entry.at("description")
: sourcemeta::core::JSON{""});
result.push_back(std::move(entry));
entries.push_back({directory_entry.at("path").to_string(),
directory_entry.defines("title")
? directory_entry.at("title").to_string()
: "",
directory_entry.defines("description")
? directory_entry.at("description").to_string()
: ""});
}
}

std::sort(result.begin(), result.end(),
[](const sourcemeta::core::JSON &left,
const sourcemeta::core::JSON &right) {
assert(left.is_array() && left.size() == 3);
assert(right.is_array() && right.size() == 3);

// Prioritise entries that have more meta-data filled in
const auto left_score = (!left.at(1).empty() ? 1 : 0) +
(!left.at(2).empty() ? 1 : 0);
const auto right_score = (!right.at(1).empty() ? 1 : 0) +
(!right.at(2).empty() ? 1 : 0);
if (left_score != right_score) {
return left_score > right_score;
}

// Otherwise revert to lexicographic comparisons
// TODO: Ideally we sort based on schema health too, given
// lint results
if (left_score > 0) {
return left.at(0).to_string() < right.at(0).to_string();
}

return false;
});

const auto payload{sourcemeta::one::make_search(std::move(entries))};
const auto timestamp_end{std::chrono::steady_clock::now()};

sourcemeta::one::metapack_write_jsonl(
action.destination, result, "application/jsonl",
const std::string_view payload_view{
payload.empty()
? std::string_view{}
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
: std::string_view{reinterpret_cast<const char *>(payload.data()),
payload.size()}};
sourcemeta::one::metapack_write_text(
action.destination, payload_view, "application/jsonl",
// We don't want to compress this one so we can
// quickly skim through it while streaming it
sourcemeta::one::MetapackEncoding::Identity, {},
Expand Down
8 changes: 0 additions & 8 deletions src/metapack/include/sourcemeta/one/metapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include <optional> // std::optional
#include <span> // std::span
#include <string_view> // std::string_view
#include <vector> // std::vector

namespace sourcemeta::one {

Expand Down Expand Up @@ -71,13 +70,6 @@ auto metapack_write_text(const std::filesystem::path &destination,
std::span<const std::uint8_t> extension,
std::chrono::milliseconds duration) -> void;

SOURCEMETA_ONE_METAPACK_EXPORT
auto metapack_write_jsonl(const std::filesystem::path &destination,
const std::vector<sourcemeta::core::JSON> &entries,
std::string_view mime, MetapackEncoding encoding,
std::span<const std::uint8_t> extension,
std::chrono::milliseconds duration) -> void;

SOURCEMETA_ONE_METAPACK_EXPORT
auto metapack_write_file(const std::filesystem::path &destination,
const std::filesystem::path &source,
Expand Down
17 changes: 0 additions & 17 deletions src/metapack/metapack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,23 +127,6 @@ auto metapack_write_text(const std::filesystem::path &destination,
write_metapack(destination, mime, encoding, extension, duration, content);
}

auto metapack_write_jsonl(const std::filesystem::path &destination,
const std::vector<sourcemeta::core::JSON> &entries,
const std::string_view mime,
const MetapackEncoding encoding,
const std::span<const std::uint8_t> extension,
const std::chrono::milliseconds duration) -> void {
std::ostringstream buffer;
for (const auto &entry : entries) {
sourcemeta::core::stringify(entry, buffer);
buffer << '\n';
}

std::filesystem::create_directories(destination.parent_path());
write_metapack(destination, mime, encoding, extension, duration,
buffer.str());
}

auto metapack_write_file(const std::filesystem::path &destination,
const std::filesystem::path &source,
const std::string_view mime,
Expand Down
6 changes: 6 additions & 0 deletions src/search/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sourcemeta_library(NAMESPACE sourcemeta PROJECT one NAME search
SOURCES search.cc)

target_link_libraries(sourcemeta_one_search PUBLIC sourcemeta::core::json)
target_link_libraries(sourcemeta_one_search PUBLIC sourcemeta::core::io)
target_link_libraries(sourcemeta_one_search PRIVATE sourcemeta::one::metapack)
57 changes: 57 additions & 0 deletions src/search/include/sourcemeta/one/search.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#ifndef SOURCEMETA_ONE_SEARCH_H_
#define SOURCEMETA_ONE_SEARCH_H_

#ifndef SOURCEMETA_ONE_SEARCH_EXPORT
#include <sourcemeta/one/search_export.h>
#endif

#include <sourcemeta/core/io.h>
#include <sourcemeta/core/json.h>

#include <cstddef> // std::size_t
#include <cstdint> // std::uint8_t
#include <filesystem> // std::filesystem::path
#include <memory> // std::unique_ptr
#include <string> // std::string
#include <string_view> // std::string_view
#include <vector> // std::vector

namespace sourcemeta::one {

struct SearchEntry {
std::string path;
std::string title;
std::string description;
};

SOURCEMETA_ONE_SEARCH_EXPORT
auto make_search(std::vector<SearchEntry> &&entries)
-> std::vector<std::uint8_t>;

SOURCEMETA_ONE_SEARCH_EXPORT
auto search(const std::uint8_t *payload, std::size_t payload_size,
std::string_view query) -> sourcemeta::core::JSON;

class SOURCEMETA_ONE_SEARCH_EXPORT SearchView {
public:
explicit SearchView(std::filesystem::path path);
~SearchView();

SearchView(const SearchView &) = delete;
SearchView(SearchView &&) = delete;
auto operator=(const SearchView &) -> SearchView & = delete;
auto operator=(SearchView &&) -> SearchView & = delete;

auto search(std::string_view query) -> sourcemeta::core::JSON;

private:
std::filesystem::path path_;
std::unique_ptr<sourcemeta::core::FileView> view_;
const std::uint8_t *payload_{nullptr};
std::size_t payload_size_{0};
auto ensure_open() -> void;
};

} // namespace sourcemeta::one

#endif
123 changes: 123 additions & 0 deletions src/search/search.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include <sourcemeta/one/search.h>

#include <sourcemeta/one/metapack.h>

#include <algorithm> // std::ranges::search
#include <cassert> // assert
#include <cctype> // std::tolower
#include <sstream> // std::ostringstream
#include <utility> // std::move

namespace sourcemeta::one {

auto make_search(std::vector<SearchEntry> &&entries)
-> std::vector<std::uint8_t> {
// Prioritise entries that have more metadata filled in,
// then sort lexicographically by path
std::ranges::sort(entries, [](const SearchEntry &left,
const SearchEntry &right) {
const auto left_score =
(!left.title.empty() ? 1 : 0) + (!left.description.empty() ? 1 : 0);
const auto right_score =
(!right.title.empty() ? 1 : 0) + (!right.description.empty() ? 1 : 0);
if (left_score != right_score) {
return left_score > right_score;
}

// TODO: Ideally we sort based on schema health too, given
// lint results
return left.path < right.path;
});

std::ostringstream buffer;
for (const auto &entry : entries) {
auto json_entry{sourcemeta::core::JSON::make_array()};
json_entry.push_back(sourcemeta::core::JSON{entry.path});
json_entry.push_back(sourcemeta::core::JSON{entry.title});
json_entry.push_back(sourcemeta::core::JSON{entry.description});
sourcemeta::core::stringify(json_entry, buffer);
buffer << '\n';
}

const auto result{buffer.str()};
return {result.begin(), result.end()};
}

auto search(const std::uint8_t *payload, const std::size_t payload_size,
const std::string_view query) -> sourcemeta::core::JSON {
auto result{sourcemeta::core::JSON::make_array()};
if (payload_size == 0) {
return result;
}

assert(payload != nullptr);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
const std::string_view data{reinterpret_cast<const char *>(payload),
payload_size};
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

search() constructs a std::string_view from payload even when payload may be nullptr (tests call search(nullptr, 0, ...)), which can violate string_view's pointer preconditions even for size 0. Consider guarding the empty-payload case before building data.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.


std::size_t line_start{0};
while (line_start < data.size()) {
auto line_end{data.find('\n', line_start)};
if (line_end == std::string_view::npos) {
line_end = data.size();
}

const auto line{data.substr(line_start, line_end - line_start)};
line_start = line_end + 1;

if (line.empty()) {
continue;
}

if (std::ranges::search(line, query, [](const auto left, const auto right) {
return std::tolower(static_cast<unsigned char>(left)) ==
std::tolower(static_cast<unsigned char>(right));
}).empty()) {
continue;
}

auto entry{sourcemeta::core::JSON::make_object()};
const std::string line_string{line};
auto line_json{sourcemeta::core::parse_json(line_string)};
entry.assign("path", std::move(line_json.at(0)));
entry.assign("title", std::move(line_json.at(1)));
entry.assign("description", std::move(line_json.at(2)));
result.push_back(std::move(entry));

constexpr auto MAXIMUM_SEARCH_COUNT{10};
if (result.array_size() >= MAXIMUM_SEARCH_COUNT) {
break;
}
}

return result;
}

SearchView::SearchView(std::filesystem::path path) : path_{std::move(path)} {}

SearchView::~SearchView() = default;

auto SearchView::ensure_open() -> void {
if (this->view_) {
return;
}

assert(std::filesystem::exists(this->path_));
assert(this->path_.is_absolute());
this->view_ = std::make_unique<sourcemeta::core::FileView>(this->path_);
const auto payload_start_option{metapack_payload_offset(*this->view_)};
assert(payload_start_option.has_value());
const auto &payload_start{payload_start_option.value()};
this->payload_size_ = this->view_->size() - payload_start;
if (this->payload_size_ > 0) {
this->payload_ = this->view_->as<std::uint8_t>(payload_start);
}
}

auto SearchView::search(const std::string_view query)
-> sourcemeta::core::JSON {
this->ensure_open();
return sourcemeta::one::search(this->payload_, this->payload_size_, query);
}

} // namespace sourcemeta::one
1 change: 1 addition & 0 deletions src/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ target_link_libraries(sourcemeta_one_server PRIVATE uNetworking::uWebSockets)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::gzip)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::shared)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::metapack)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::search)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::evaluator)
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::output)

Expand Down
Loading
Loading