Skip to content

Commit 8676745

Browse files
committed
Introduce a search module
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 947760f commit 8676745

13 files changed

Lines changed: 391 additions & 136 deletions

File tree

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ if(ONE_INDEX OR ONE_SERVER)
6161
add_subdirectory(src/shared)
6262
add_subdirectory(src/gzip)
6363
add_subdirectory(src/metapack)
64+
add_subdirectory(src/search)
6465
endif()
6566

6667
if(ONE_INDEX)
@@ -124,6 +125,7 @@ if(ONE_TESTS)
124125
if(ONE_INDEX OR ONE_SERVER)
125126
add_subdirectory(test/unit/gzip)
126127
add_subdirectory(test/unit/metapack)
128+
add_subdirectory(test/unit/search)
127129
endif()
128130

129131
if(ONE_INDEX)

src/index/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ endif()
1414
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::resolver)
1515
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::shared)
1616
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::metapack)
17+
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::search)
1718
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::configuration)
1819
target_link_libraries(sourcemeta_one_index PRIVATE sourcemeta::one::web)
1920

src/index/explorer.h

Lines changed: 16 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <sourcemeta/one/configuration.h>
55
#include <sourcemeta/one/metapack.h>
66
#include <sourcemeta/one/resolver.h>
7+
#include <sourcemeta/one/search.h>
78
#include <sourcemeta/one/shared.h>
89

910
#include <sourcemeta/core/json.h>
@@ -499,7 +500,7 @@ struct GENERATE_EXPLORER_SEARCH_INDEX {
499500
const sourcemeta::one::Configuration &,
500501
const sourcemeta::core::JSON &) -> void {
501502
const auto timestamp_start{std::chrono::steady_clock::now()};
502-
std::vector<sourcemeta::core::JSON> result;
503+
std::vector<sourcemeta::one::SearchEntry> entries;
503504

504505
for (const auto &dependency : action.dependencies) {
505506
const auto directory_option{
@@ -515,48 +516,25 @@ struct GENERATE_EXPLORER_SEARCH_INDEX {
515516
continue;
516517
}
517518

518-
auto entry{sourcemeta::core::JSON::make_array()};
519-
entry.push_back(
520-
sourcemeta::core::JSON{directory_entry.at("path").to_string()});
521-
entry.push_back(directory_entry.defines("title")
522-
? directory_entry.at("title")
523-
: sourcemeta::core::JSON{""});
524-
entry.push_back(directory_entry.defines("description")
525-
? directory_entry.at("description")
526-
: sourcemeta::core::JSON{""});
527-
result.push_back(std::move(entry));
519+
entries.push_back(
520+
{directory_entry.at("path").to_string(),
521+
directory_entry.defines("title")
522+
? directory_entry.at("title").to_string()
523+
: "",
524+
directory_entry.defines("description")
525+
? directory_entry.at("description").to_string()
526+
: ""});
528527
}
529528
}
530529

531-
std::sort(result.begin(), result.end(),
532-
[](const sourcemeta::core::JSON &left,
533-
const sourcemeta::core::JSON &right) {
534-
assert(left.is_array() && left.size() == 3);
535-
assert(right.is_array() && right.size() == 3);
536-
537-
// Prioritise entries that have more meta-data filled in
538-
const auto left_score = (!left.at(1).empty() ? 1 : 0) +
539-
(!left.at(2).empty() ? 1 : 0);
540-
const auto right_score = (!right.at(1).empty() ? 1 : 0) +
541-
(!right.at(2).empty() ? 1 : 0);
542-
if (left_score != right_score) {
543-
return left_score > right_score;
544-
}
545-
546-
// Otherwise revert to lexicographic comparisons
547-
// TODO: Ideally we sort based on schema health too, given
548-
// lint results
549-
if (left_score > 0) {
550-
return left.at(0).to_string() < right.at(0).to_string();
551-
}
552-
553-
return false;
554-
});
555-
530+
const auto payload{sourcemeta::one::make_search(std::move(entries))};
556531
const auto timestamp_end{std::chrono::steady_clock::now()};
557532

558-
sourcemeta::one::metapack_write_jsonl(
559-
action.destination, result, "application/jsonl",
533+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
534+
const std::string_view payload_view{
535+
reinterpret_cast<const char *>(payload.data()), payload.size()};
536+
sourcemeta::one::metapack_write_text(
537+
action.destination, payload_view, "application/jsonl",
560538
// We don't want to compress this one so we can
561539
// quickly skim through it while streaming it
562540
sourcemeta::one::MetapackEncoding::Identity, {},

src/metapack/include/sourcemeta/one/metapack.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include <optional> // std::optional
1616
#include <span> // std::span
1717
#include <string_view> // std::string_view
18-
#include <vector> // std::vector
1918

2019
namespace sourcemeta::one {
2120

@@ -71,13 +70,6 @@ auto metapack_write_text(const std::filesystem::path &destination,
7170
std::span<const std::uint8_t> extension,
7271
std::chrono::milliseconds duration) -> void;
7372

74-
SOURCEMETA_ONE_METAPACK_EXPORT
75-
auto metapack_write_jsonl(const std::filesystem::path &destination,
76-
const std::vector<sourcemeta::core::JSON> &entries,
77-
std::string_view mime, MetapackEncoding encoding,
78-
std::span<const std::uint8_t> extension,
79-
std::chrono::milliseconds duration) -> void;
80-
8173
SOURCEMETA_ONE_METAPACK_EXPORT
8274
auto metapack_write_file(const std::filesystem::path &destination,
8375
const std::filesystem::path &source,

src/metapack/metapack.cc

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -127,23 +127,6 @@ auto metapack_write_text(const std::filesystem::path &destination,
127127
write_metapack(destination, mime, encoding, extension, duration, content);
128128
}
129129

130-
auto metapack_write_jsonl(const std::filesystem::path &destination,
131-
const std::vector<sourcemeta::core::JSON> &entries,
132-
const std::string_view mime,
133-
const MetapackEncoding encoding,
134-
const std::span<const std::uint8_t> extension,
135-
const std::chrono::milliseconds duration) -> void {
136-
std::ostringstream buffer;
137-
for (const auto &entry : entries) {
138-
sourcemeta::core::stringify(entry, buffer);
139-
buffer << '\n';
140-
}
141-
142-
std::filesystem::create_directories(destination.parent_path());
143-
write_metapack(destination, mime, encoding, extension, duration,
144-
buffer.str());
145-
}
146-
147130
auto metapack_write_file(const std::filesystem::path &destination,
148131
const std::filesystem::path &source,
149132
const std::string_view mime,

src/search/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
sourcemeta_library(NAMESPACE sourcemeta PROJECT one NAME search
2+
SOURCES search.cc)
3+
4+
target_link_libraries(sourcemeta_one_search PUBLIC sourcemeta::core::json)
5+
target_link_libraries(sourcemeta_one_search PUBLIC sourcemeta::core::io)
6+
target_link_libraries(sourcemeta_one_search PRIVATE sourcemeta::one::metapack)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#ifndef SOURCEMETA_ONE_SEARCH_H_
2+
#define SOURCEMETA_ONE_SEARCH_H_
3+
4+
#ifndef SOURCEMETA_ONE_SEARCH_EXPORT
5+
#include <sourcemeta/one/search_export.h>
6+
#endif
7+
8+
#include <sourcemeta/core/io.h>
9+
#include <sourcemeta/core/json.h>
10+
11+
#include <cstddef> // std::size_t
12+
#include <cstdint> // std::uint8_t
13+
#include <filesystem> // std::filesystem::path
14+
#include <memory> // std::unique_ptr
15+
#include <string> // std::string
16+
#include <string_view> // std::string_view
17+
#include <vector> // std::vector
18+
19+
namespace sourcemeta::one {
20+
21+
struct SearchEntry {
22+
std::string path;
23+
std::string title;
24+
std::string description;
25+
};
26+
27+
SOURCEMETA_ONE_SEARCH_EXPORT
28+
auto make_search(std::vector<SearchEntry> &&entries)
29+
-> std::vector<std::uint8_t>;
30+
31+
SOURCEMETA_ONE_SEARCH_EXPORT
32+
auto search(const std::uint8_t *payload, std::size_t payload_size,
33+
std::string_view query) -> sourcemeta::core::JSON;
34+
35+
class SOURCEMETA_ONE_SEARCH_EXPORT SearchView {
36+
public:
37+
explicit SearchView(std::filesystem::path path);
38+
~SearchView();
39+
40+
SearchView(const SearchView &) = delete;
41+
SearchView(SearchView &&) = delete;
42+
auto operator=(const SearchView &) -> SearchView & = delete;
43+
auto operator=(SearchView &&) -> SearchView & = delete;
44+
45+
auto search(std::string_view query) -> sourcemeta::core::JSON;
46+
47+
private:
48+
std::filesystem::path path_;
49+
std::unique_ptr<sourcemeta::core::FileView> view_;
50+
const std::uint8_t *payload_{nullptr};
51+
std::size_t payload_size_{0};
52+
auto ensure_open() -> void;
53+
};
54+
55+
} // namespace sourcemeta::one
56+
57+
#endif

src/search/search.cc

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#include <sourcemeta/one/search.h>
2+
3+
#include <sourcemeta/one/metapack.h>
4+
5+
#include <algorithm> // std::ranges::search
6+
#include <cassert> // assert
7+
#include <cctype> // std::tolower
8+
#include <sstream> // std::ostringstream
9+
#include <utility> // std::move
10+
11+
namespace sourcemeta::one {
12+
13+
auto make_search(std::vector<SearchEntry> &&entries)
14+
-> std::vector<std::uint8_t> {
15+
// Prioritise entries that have more metadata filled in,
16+
// then sort lexicographically by path
17+
std::ranges::sort(entries,
18+
[](const SearchEntry &left, const SearchEntry &right) {
19+
const auto left_score =
20+
(!left.title.empty() ? 1 : 0) +
21+
(!left.description.empty() ? 1 : 0);
22+
const auto right_score =
23+
(!right.title.empty() ? 1 : 0) +
24+
(!right.description.empty() ? 1 : 0);
25+
if (left_score != right_score) {
26+
return left_score > right_score;
27+
}
28+
29+
// TODO: Ideally we sort based on schema health too, given
30+
// lint results
31+
if (left_score > 0) {
32+
return left.path < right.path;
33+
}
34+
35+
return false;
36+
});
37+
38+
std::ostringstream buffer;
39+
for (const auto &entry : entries) {
40+
auto json_entry{sourcemeta::core::JSON::make_array()};
41+
json_entry.push_back(sourcemeta::core::JSON{entry.path});
42+
json_entry.push_back(sourcemeta::core::JSON{entry.title});
43+
json_entry.push_back(sourcemeta::core::JSON{entry.description});
44+
sourcemeta::core::stringify(json_entry, buffer);
45+
buffer << '\n';
46+
}
47+
48+
const auto result{buffer.str()};
49+
return {result.begin(), result.end()};
50+
}
51+
52+
auto search(const std::uint8_t *payload, const std::size_t payload_size,
53+
const std::string_view query) -> sourcemeta::core::JSON {
54+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
55+
const std::string_view data{reinterpret_cast<const char *>(payload),
56+
payload_size};
57+
58+
auto result{sourcemeta::core::JSON::make_array()};
59+
std::size_t line_start{0};
60+
while (line_start < data.size()) {
61+
auto line_end{data.find('\n', line_start)};
62+
if (line_end == std::string_view::npos) {
63+
line_end = data.size();
64+
}
65+
66+
const auto line{data.substr(line_start, line_end - line_start)};
67+
line_start = line_end + 1;
68+
69+
if (line.empty()) {
70+
continue;
71+
}
72+
73+
if (std::ranges::search(line, query, [](const auto left, const auto right) {
74+
return std::tolower(left) == std::tolower(right);
75+
}).empty()) {
76+
continue;
77+
}
78+
79+
auto entry{sourcemeta::core::JSON::make_object()};
80+
const std::string line_string{line};
81+
auto line_json{sourcemeta::core::parse_json(line_string)};
82+
entry.assign("path", std::move(line_json.at(0)));
83+
entry.assign("title", std::move(line_json.at(1)));
84+
entry.assign("description", std::move(line_json.at(2)));
85+
result.push_back(std::move(entry));
86+
87+
constexpr auto MAXIMUM_SEARCH_COUNT{10};
88+
if (result.array_size() >= MAXIMUM_SEARCH_COUNT) {
89+
break;
90+
}
91+
}
92+
93+
return result;
94+
}
95+
96+
SearchView::SearchView(std::filesystem::path path) : path_{std::move(path)} {}
97+
98+
SearchView::~SearchView() = default;
99+
100+
auto SearchView::ensure_open() -> void {
101+
if (this->view_) {
102+
return;
103+
}
104+
105+
assert(std::filesystem::exists(this->path_));
106+
assert(this->path_.is_absolute());
107+
this->view_ = std::make_unique<sourcemeta::core::FileView>(this->path_);
108+
const auto payload_start_option{metapack_payload_offset(*this->view_)};
109+
assert(payload_start_option.has_value());
110+
const auto &payload_start{payload_start_option.value()};
111+
this->payload_size_ = this->view_->size() - payload_start;
112+
this->payload_ = this->view_->as<std::uint8_t>(payload_start);
113+
}
114+
115+
auto SearchView::search(const std::string_view query)
116+
-> sourcemeta::core::JSON {
117+
this->ensure_open();
118+
return sourcemeta::one::search(this->payload_, this->payload_size_, query);
119+
}
120+
121+
} // namespace sourcemeta::one

src/server/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ target_link_libraries(sourcemeta_one_server PRIVATE uNetworking::uWebSockets)
2323
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::gzip)
2424
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::shared)
2525
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::metapack)
26+
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::one::search)
2627
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::evaluator)
2728
target_link_libraries(sourcemeta_one_server PRIVATE sourcemeta::blaze::output)
2829

0 commit comments

Comments
 (0)