diff --git a/src/index/explorer.h b/src/index/explorer.h index 09262ea3..7ccd7636 100644 --- a/src/index/explorer.h +++ b/src/index/explorer.h @@ -540,7 +540,7 @@ struct GENERATE_EXPLORER_SEARCH_INDEX { : std::string_view{reinterpret_cast(payload.data()), payload.size()}}; sourcemeta::one::metapack_write_text( - action.destination, payload_view, "application/jsonl", + action.destination, payload_view, "application/octet-stream", // We don't want to compress this one so we can // quickly skim through it while streaming it sourcemeta::one::MetapackEncoding::Identity, {}, diff --git a/src/search/include/sourcemeta/one/search.h b/src/search/include/sourcemeta/one/search.h index 1c29ac13..22841d4b 100644 --- a/src/search/include/sourcemeta/one/search.h +++ b/src/search/include/sourcemeta/one/search.h @@ -25,6 +25,19 @@ struct SearchEntry { std::uint8_t health; }; +#pragma pack(push, 1) +struct SearchIndexHeader { + std::uint32_t entry_count; + std::uint32_t records_offset; +}; + +struct SearchRecordHeader { + std::uint16_t path_length; + std::uint16_t title_length; + std::uint16_t description_length; +}; +#pragma pack(pop) + SOURCEMETA_ONE_SEARCH_EXPORT auto make_search(std::vector &&entries) -> std::vector; diff --git a/src/search/search.cc b/src/search/search.cc index b85e1a5d..4151f9f3 100644 --- a/src/search/search.cc +++ b/src/search/search.cc @@ -5,7 +5,8 @@ #include // std::ranges::search #include // assert #include // std::tolower -#include // std::ostringstream +#include // std::memcpy +#include // std::numeric_limits #include // std::move namespace sourcemeta::one { @@ -29,59 +30,155 @@ auto make_search(std::vector &&entries) return left.path < right.path; }); - std::ostringstream buffer; + constexpr auto MAX_FIELD_LENGTH{ + static_cast(std::numeric_limits::max())}; + std::erase_if(entries, [](const SearchEntry &entry) { + return entry.path.size() > MAX_FIELD_LENGTH || + entry.title.size() > MAX_FIELD_LENGTH || + entry.description.size() > MAX_FIELD_LENGTH; + }); + + if (entries.empty()) { + return {}; + } + + const auto entry_count{static_cast(entries.size())}; + + // Compute total payload size + std::size_t total_size{sizeof(SearchIndexHeader) + + entry_count * sizeof(std::uint32_t)}; for (const auto &entry : entries) { - auto json_entry{sourcemeta::core::JSON::make_array()}; - json_entry.push_back(sourcemeta::core::JSON{entry.path}); - json_entry.push_back(sourcemeta::core::JSON{entry.title}); - json_entry.push_back(sourcemeta::core::JSON{entry.description}); - sourcemeta::core::stringify(json_entry, buffer); - buffer << '\n'; + total_size += sizeof(SearchRecordHeader) + entry.path.size() + + entry.title.size() + entry.description.size(); } - const auto result{buffer.str()}; - return {result.begin(), result.end()}; + std::vector payload(total_size); + const auto records_offset{static_cast( + sizeof(SearchIndexHeader) + entry_count * sizeof(std::uint32_t))}; + + // Write header + SearchIndexHeader header{.entry_count = entry_count, + .records_offset = records_offset}; + std::memcpy(payload.data(), &header, sizeof(SearchIndexHeader)); + + // Write records and fill offset table + auto *offset_table{payload.data() + sizeof(SearchIndexHeader)}; + std::size_t record_position{records_offset}; + for (std::uint32_t entry_index{0}; entry_index < entry_count; ++entry_index) { + const auto &entry{entries[entry_index]}; + + // Write this record's offset into the table + const auto record_offset{static_cast(record_position)}; + std::memcpy(offset_table + entry_index * sizeof(std::uint32_t), + &record_offset, sizeof(std::uint32_t)); + + // Write record header + const SearchRecordHeader record_header{ + .path_length = static_cast(entry.path.size()), + .title_length = static_cast(entry.title.size()), + .description_length = + static_cast(entry.description.size())}; + std::memcpy(payload.data() + record_position, &record_header, + sizeof(SearchRecordHeader)); + record_position += sizeof(SearchRecordHeader); + + // Write field data + std::memcpy(payload.data() + record_position, entry.path.data(), + entry.path.size()); + record_position += entry.path.size(); + std::memcpy(payload.data() + record_position, entry.title.data(), + entry.title.size()); + record_position += entry.title.size(); + std::memcpy(payload.data() + record_position, entry.description.data(), + entry.description.size()); + record_position += entry.description.size(); + } + + assert(record_position == total_size); + return payload; +} + +static auto case_insensitive_contains(const std::string_view haystack, + const std::string_view needle) -> bool { + return !std::ranges::search( + haystack, needle, + [](const auto left, const auto right) { + return std::tolower(static_cast(left)) == + std::tolower(static_cast(right)); + }) + .empty(); } auto search(const std::uint8_t *payload, const std::size_t payload_size, const std::string_view query) -> sourcemeta::core::JSON { auto result{sourcemeta::core::JSON::make_array()}; - if (payload_size == 0) { + if (payload == nullptr || payload_size < sizeof(SearchIndexHeader)) { return result; } - assert(payload != nullptr); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - const std::string_view data{reinterpret_cast(payload), - payload_size}; - - std::size_t line_start{0}; - while (line_start < data.size()) { - auto line_end{data.find('\n', line_start)}; - if (line_end == std::string_view::npos) { - line_end = data.size(); + const auto *header{reinterpret_cast(payload)}; + + if (header->entry_count == 0) { + return result; + } + + const auto offset_table_end{sizeof(SearchIndexHeader) + + static_cast(header->entry_count) * + sizeof(std::uint32_t)}; + if (offset_table_end > payload_size) { + return result; + } + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *offset_table{reinterpret_cast( + payload + sizeof(SearchIndexHeader))}; + + for (std::uint32_t entry_index{0}; entry_index < header->entry_count; + ++entry_index) { + const auto record_offset{offset_table[entry_index]}; + if (record_offset + sizeof(SearchRecordHeader) > payload_size) { + break; } - const auto line{data.substr(line_start, line_end - line_start)}; - line_start = line_end + 1; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const auto *record_header{ + reinterpret_cast(payload + record_offset)}; - if (line.empty()) { - continue; + const auto field_data_offset{record_offset + sizeof(SearchRecordHeader)}; + const auto total_field_length{ + static_cast(record_header->path_length) + + record_header->title_length + record_header->description_length}; + if (field_data_offset + total_field_length > payload_size) { + break; } - if (std::ranges::search(line, query, [](const auto left, const auto right) { - return std::tolower(static_cast(left)) == - std::tolower(static_cast(right)); - }).empty()) { + const auto *field_data{payload + field_data_offset}; + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string_view path{reinterpret_cast(field_data), + record_header->path_length}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string_view title{ + reinterpret_cast(field_data + record_header->path_length), + record_header->title_length}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string_view description{ + reinterpret_cast(field_data + record_header->path_length + + record_header->title_length), + record_header->description_length}; + + if (!case_insensitive_contains(path, query) && + !case_insensitive_contains(title, query) && + !case_insensitive_contains(description, query)) { continue; } auto entry{sourcemeta::core::JSON::make_object()}; - const std::string line_string{line}; - auto line_json{sourcemeta::core::parse_json(line_string)}; - entry.assign("path", std::move(line_json.at(0))); - entry.assign("title", std::move(line_json.at(1))); - entry.assign("description", std::move(line_json.at(2))); + entry.assign("path", sourcemeta::core::JSON{std::string{path}}); + entry.assign("title", sourcemeta::core::JSON{std::string{title}}); + entry.assign("description", + sourcemeta::core::JSON{std::string{description}}); result.push_back(std::move(entry)); constexpr auto MAXIMUM_SEARCH_COUNT{10}; diff --git a/test/cli/index/common/search-index-nested-rebuild.sh b/test/cli/index/common/search-index-nested-rebuild.sh index 8c795458..8aabcc54 100755 --- a/test/cli/index/common/search-index-nested-rebuild.sh +++ b/test/cli/index/common/search-index-nested-rebuild.sh @@ -53,17 +53,17 @@ EOF SEARCH="$TMP/output/explorer/%/search.metapack" -extract_search_entries() { - strings "$1" | grep '^\[' | LC_ALL=C sort +extract_search_paths() { + strings "$1" | grep '^/' | LC_ALL=C sort } # Run 1: full build with two schemas in separate directories "$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 -extract_search_entries "$SEARCH" > "$TMP/search_actual.txt" +extract_search_paths "$SEARCH" > "$TMP/search_actual.txt" cat << 'EOF' > "$TMP/search_expected.txt" -["/left/schemas/a","",""] -["/right/schemas/b","",""] +/left/schemas/a +/right/schemas/b EOF diff "$TMP/search_actual.txt" "$TMP/search_expected.txt" @@ -78,10 +78,10 @@ EOF "$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 -extract_search_entries "$SEARCH" > "$TMP/search_actual.txt" +extract_search_paths "$SEARCH" > "$TMP/search_actual.txt" cat << 'EOF' > "$TMP/search_expected.txt" -["/left/schemas/a","",""] -["/right/schemas/b","",""] +/left/schemas/a +/right/schemas/b EOF diff "$TMP/search_actual.txt" "$TMP/search_expected.txt" @@ -95,10 +95,10 @@ EOF "$1" --skip-banner "$TMP/one.json" "$TMP/output" --concurrency 1 > /dev/null 2>&1 -extract_search_entries "$SEARCH" > "$TMP/search_actual.txt" +extract_search_paths "$SEARCH" > "$TMP/search_actual.txt" cat << 'EOF' > "$TMP/search_expected.txt" -["/left/schemas/a","",""] -["/right/schemas/b","",""] -["/right/schemas/c","",""] +/left/schemas/a +/right/schemas/b +/right/schemas/c EOF diff "$TMP/search_actual.txt" "$TMP/search_expected.txt" diff --git a/test/unit/search/CMakeLists.txt b/test/unit/search/CMakeLists.txt index 2cf1db8b..1aedd134 100644 --- a/test/unit/search/CMakeLists.txt +++ b/test/unit/search/CMakeLists.txt @@ -1,5 +1,5 @@ sourcemeta_googletest(NAMESPACE sourcemeta PROJECT one NAME search - SOURCES search_test.cc) + SOURCES search_build_test.cc search_query_test.cc) target_link_libraries(sourcemeta_one_search_unit PRIVATE sourcemeta::one::search) diff --git a/test/unit/search/search_build_test.cc b/test/unit/search/search_build_test.cc new file mode 100644 index 00000000..3bb35fa7 --- /dev/null +++ b/test/unit/search/search_build_test.cc @@ -0,0 +1,186 @@ +#include + +#include + +#include // std::memcpy +#include // std::string +#include // std::move +#include // std::vector + +TEST(Search_build, empty) { + std::vector entries; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + EXPECT_TRUE(payload.empty()); +} + +TEST(Search_build, single_entry) { + std::vector entries{ + {"/foo/bar", "My Title", "A description", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + EXPECT_FALSE(payload.empty()); + EXPECT_GE(payload.size(), sizeof(sourcemeta::one::SearchIndexHeader)); +} + +TEST(Search_build, header_single_entry) { + std::vector entries{ + {"/foo", "Title", "Desc", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 1); + EXPECT_EQ(header.records_offset, + sizeof(sourcemeta::one::SearchIndexHeader) + sizeof(std::uint32_t)); +} + +TEST(Search_build, header_multiple_entries) { + std::vector entries{{"/a", "A", "Desc A", 80}, + {"/b", "B", "Desc B", 80}, + {"/c", "C", "Desc C", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 3); + EXPECT_EQ(header.records_offset, sizeof(sourcemeta::one::SearchIndexHeader) + + 3 * sizeof(std::uint32_t)); +} + +TEST(Search_build, offset_table) { + std::vector entries{{"/a", "A", "D", 80}, + {"/b", "BB", "DD", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + + std::uint32_t offset_first{0}; + std::uint32_t offset_second{0}; + std::memcpy(&offset_first, + payload.data() + sizeof(sourcemeta::one::SearchIndexHeader), + sizeof(std::uint32_t)); + std::memcpy(&offset_second, + payload.data() + sizeof(sourcemeta::one::SearchIndexHeader) + + sizeof(std::uint32_t), + sizeof(std::uint32_t)); + + EXPECT_EQ(offset_first, header.records_offset); + const auto first_record_size{sizeof(sourcemeta::one::SearchRecordHeader) + 2 + + 1 + 1}; + EXPECT_EQ(offset_second, offset_first + first_record_size); +} + +TEST(Search_build, record_fields) { + std::vector entries{ + {"/test/path", "My Title", "My Description", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + + sourcemeta::one::SearchRecordHeader record_header{}; + std::memcpy(&record_header, payload.data() + header.records_offset, + sizeof(sourcemeta::one::SearchRecordHeader)); + EXPECT_EQ(record_header.path_length, 10); + EXPECT_EQ(record_header.title_length, 8); + EXPECT_EQ(record_header.description_length, 14); + + const auto *field_data{payload.data() + header.records_offset + + sizeof(sourcemeta::one::SearchRecordHeader)}; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string path(reinterpret_cast(field_data), + record_header.path_length); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string title( + reinterpret_cast(field_data + record_header.path_length), + record_header.title_length); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + const std::string description( + reinterpret_cast(field_data + record_header.path_length + + record_header.title_length), + record_header.description_length); + EXPECT_EQ(path, "/test/path"); + EXPECT_EQ(title, "My Title"); + EXPECT_EQ(description, "My Description"); +} + +TEST(Search_build, total_size) { + std::vector entries{{"/a", "T", "D", 80}, + {"/bb", "TT", "DD", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + const auto expected_size{ + sizeof(sourcemeta::one::SearchIndexHeader) + 2 * sizeof(std::uint32_t) + + sizeof(sourcemeta::one::SearchRecordHeader) + 2 + 1 + 1 + + sizeof(sourcemeta::one::SearchRecordHeader) + 3 + 2 + 2}; + EXPECT_EQ(payload.size(), expected_size); +} + +TEST(Search_build, skips_entry_with_oversized_path) { + const std::string oversized_path(70000, 'x'); + std::vector entries{ + {oversized_path, "Title", "Desc", 80}, {"/normal", "Normal", "Desc", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 1); +} + +TEST(Search_build, skips_entry_with_oversized_title) { + const std::string oversized_title(70000, 'x'); + std::vector entries{ + {"/foo", oversized_title, "Desc", 80}, {"/normal", "Normal", "Desc", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 1); +} + +TEST(Search_build, skips_entry_with_oversized_description) { + const std::string oversized_description(70000, 'x'); + std::vector entries{ + {"/foo", "Title", oversized_description, 80}, + {"/normal", "Normal", "Desc", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 1); +} + +TEST(Search_build, all_entries_oversized_returns_empty) { + const std::string oversized(70000, 'x'); + std::vector entries{ + {oversized, "Title", "Desc", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + EXPECT_TRUE(payload.empty()); +} + +TEST(Search_build, entry_at_exact_uint16_max_is_kept) { + const std::string max_path(65535, 'a'); + std::vector entries{{max_path, "", "", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + EXPECT_FALSE(payload.empty()); + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + EXPECT_EQ(header.entry_count, 1); +} + +TEST(Search_build, entry_at_uint16_max_plus_one_is_skipped) { + const std::string too_long_path(65536, 'a'); + std::vector entries{ + {too_long_path, "", "", 80}}; + const auto payload{sourcemeta::one::make_search(std::move(entries))}; + EXPECT_TRUE(payload.empty()); +} diff --git a/test/unit/search/search_test.cc b/test/unit/search/search_query_test.cc similarity index 68% rename from test/unit/search/search_test.cc rename to test/unit/search/search_query_test.cc index ecd73616..6c8d80a0 100644 --- a/test/unit/search/search_test.cc +++ b/test/unit/search/search_query_test.cc @@ -4,7 +4,8 @@ #include -#include // std::string +#include // std::uint8_t, std::uint32_t +#include // std::memcpy #include // std::move #include // std::vector @@ -15,31 +16,20 @@ EXPECT_EQ((result).at(index).at("description").to_string(), \ (expected_description)); -TEST(Search, make_search_empty) { - std::vector entries; - const auto payload{sourcemeta::one::make_search(std::move(entries))}; - EXPECT_TRUE(payload.empty()); -} - -TEST(Search, make_search_single_entry) { - std::vector entries{ - {"/foo/bar", "My Title", "A description", 80}}; - const auto payload{sourcemeta::one::make_search(std::move(entries))}; - EXPECT_FALSE(payload.empty()); - - const std::string payload_string(payload.begin(), payload.end()); - EXPECT_NE(payload_string.find("/foo/bar"), std::string::npos); - EXPECT_NE(payload_string.find("My Title"), std::string::npos); - EXPECT_NE(payload_string.find("A description"), std::string::npos); +TEST(Search_query, empty_payload_nullptr) { + const auto result{sourcemeta::one::search(nullptr, 0, "anything")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); } -TEST(Search, search_empty_payload) { - const auto result{sourcemeta::one::search(nullptr, 0, "anything")}; +TEST(Search_query, empty_payload_zero_size) { + const std::uint8_t byte{0}; + const auto result{sourcemeta::one::search(&byte, 0, "anything")}; EXPECT_TRUE(result.is_array()); EXPECT_EQ(result.size(), 0); } -TEST(Search, search_no_match) { +TEST(Search_query, no_match) { std::vector entries{ {"/foo/bar", "Title", "Desc", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -49,7 +39,7 @@ TEST(Search, search_no_match) { EXPECT_EQ(result.size(), 0); } -TEST(Search, search_match_in_path) { +TEST(Search_query, match_in_path) { std::vector entries{ {"/foo/bar", "Title", "Desc", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -59,7 +49,7 @@ TEST(Search, search_match_in_path) { EXPECT_SEARCH_RESULT(result, 0, "/foo/bar", "Title", "Desc"); } -TEST(Search, search_match_in_title) { +TEST(Search_query, match_in_title) { std::vector entries{ {"/foo/bar", "Special Title", "Desc", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -69,7 +59,7 @@ TEST(Search, search_match_in_title) { EXPECT_SEARCH_RESULT(result, 0, "/foo/bar", "Special Title", "Desc"); } -TEST(Search, search_match_in_description) { +TEST(Search_query, match_in_description) { std::vector entries{ {"/foo/bar", "Title", "Unique description here", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -80,7 +70,7 @@ TEST(Search, search_match_in_description) { "Unique description here"); } -TEST(Search, search_case_insensitive) { +TEST(Search_query, case_insensitive) { std::vector entries_lower{ {"/foo/bar", "Hello World", "desc", 80}}; const auto payload_lower{ @@ -109,7 +99,7 @@ TEST(Search, search_case_insensitive) { EXPECT_SEARCH_RESULT(result_mixed, 0, "/foo/bar", "Hello World", "desc"); } -TEST(Search, search_multiple_matches) { +TEST(Search_query, multiple_matches) { std::vector entries{ {"/schemas/address", "Address Schema", "For addresses", 80}, {"/schemas/person", "Person Schema", "For people", 80}, @@ -126,7 +116,7 @@ TEST(Search, search_multiple_matches) { "For people"); } -TEST(Search, search_limit_10) { +TEST(Search_query, limit_10) { std::vector entries{ {"/schemas/test0", "Test 0", "", 80}, {"/schemas/test1", "Test 1", "", 80}, @@ -160,7 +150,7 @@ TEST(Search, search_limit_10) { EXPECT_SEARCH_RESULT(result, 9, "/schemas/test4", "Test 4", ""); } -TEST(Search, search_round_trip_data_fidelity) { +TEST(Search_query, round_trip_data_fidelity) { std::vector entries{ {"/a/b/c", "My Title", "My Description", 80}, {"/x/y/z", "", "Only description", 80}, @@ -174,7 +164,7 @@ TEST(Search, search_round_trip_data_fidelity) { EXPECT_SEARCH_RESULT(result, 2, "/x/y/z", "", "Only description"); } -TEST(Search, search_single_entry_match) { +TEST(Search_query, single_entry_match) { std::vector entries{ {"/only", "One", "Entry", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -184,7 +174,7 @@ TEST(Search, search_single_entry_match) { EXPECT_SEARCH_RESULT(result, 0, "/only", "One", "Entry"); } -TEST(Search, search_single_entry_no_match) { +TEST(Search_query, single_entry_no_match) { std::vector entries{ {"/only", "One", "Entry", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; @@ -193,7 +183,7 @@ TEST(Search, search_single_entry_no_match) { EXPECT_EQ(result.size(), 0); } -TEST(Search, search_empty_title_and_description) { +TEST(Search_query, empty_title_and_description) { std::vector entries{{"/path/only", "", "", 80}}; const auto payload{sourcemeta::one::make_search(std::move(entries))}; const auto result{ @@ -202,7 +192,7 @@ TEST(Search, search_empty_title_and_description) { EXPECT_SEARCH_RESULT(result, 0, "/path/only", "", ""); } -TEST(Search, search_health_higher_scores_first) { +TEST(Search_query, health_higher_scores_first) { std::vector entries{ {"/schemas/low", "Low Health", "Desc", 20}, {"/schemas/high", "High Health", "Desc", 100}, @@ -216,7 +206,7 @@ TEST(Search, search_health_higher_scores_first) { EXPECT_SEARCH_RESULT(result, 2, "/schemas/low", "Low Health", "Desc"); } -TEST(Search, search_health_100_before_50) { +TEST(Search_query, health_100_before_50) { std::vector entries{ {"/schemas/beta", "Beta", "Desc", 50}, {"/schemas/alpha", "Alpha", "Desc", 100}}; @@ -228,7 +218,7 @@ TEST(Search, search_health_100_before_50) { EXPECT_SEARCH_RESULT(result, 1, "/schemas/beta", "Beta", "Desc"); } -TEST(Search, search_health_0_ranks_last) { +TEST(Search_query, health_0_ranks_last) { std::vector entries{ {"/schemas/zero", "Zero", "Desc", 0}, {"/schemas/perfect", "Perfect", "Desc", 100}, @@ -242,7 +232,7 @@ TEST(Search, search_health_0_ranks_last) { EXPECT_SEARCH_RESULT(result, 2, "/schemas/zero", "Zero", "Desc"); } -TEST(Search, search_health_same_score_sorts_by_path) { +TEST(Search_query, health_same_score_sorts_by_path) { std::vector entries{ {"/schemas/zebra", "Zebra", "Desc", 75}, {"/schemas/apple", "Apple", "Desc", 75}, @@ -256,7 +246,7 @@ TEST(Search, search_health_same_score_sorts_by_path) { EXPECT_SEARCH_RESULT(result, 2, "/schemas/zebra", "Zebra", "Desc"); } -TEST(Search, search_metadata_score_beats_health) { +TEST(Search_query, metadata_score_beats_health) { std::vector entries{ {"/schemas/healthy", "", "", 100}, {"/schemas/complete", "Title", "Description", 30}}; @@ -268,7 +258,7 @@ TEST(Search, search_metadata_score_beats_health) { EXPECT_SEARCH_RESULT(result, 1, "/schemas/healthy", "", ""); } -TEST(Search, search_metadata_score_beats_health_title_only) { +TEST(Search_query, metadata_score_beats_health_title_only) { std::vector entries{ {"/schemas/no-meta", "", "", 100}, {"/schemas/has-title", "A Title", "", 10}}; @@ -280,7 +270,7 @@ TEST(Search, search_metadata_score_beats_health_title_only) { EXPECT_SEARCH_RESULT(result, 1, "/schemas/no-meta", "", ""); } -TEST(Search, search_health_tiebreaker_within_same_metadata) { +TEST(Search_query, health_tiebreaker_within_same_metadata) { std::vector entries{ {"/schemas/low-health", "Title", "", 25}, {"/schemas/high-health", "Title", "", 90}, @@ -294,7 +284,7 @@ TEST(Search, search_health_tiebreaker_within_same_metadata) { EXPECT_SEARCH_RESULT(result, 2, "/schemas/low-health", "Title", ""); } -TEST(Search, search_health_fine_grained_ordering) { +TEST(Search_query, health_fine_grained_ordering) { std::vector entries{ {"/schemas/d", "Title", "Desc", 70}, {"/schemas/a", "Title", "Desc", 100}, @@ -312,7 +302,7 @@ TEST(Search, search_health_fine_grained_ordering) { EXPECT_SEARCH_RESULT(result, 4, "/schemas/e", "Title", "Desc"); } -TEST(Search, search_health_mixed_metadata_and_health) { +TEST(Search_query, health_mixed_metadata_and_health) { std::vector entries{ {"/schemas/full-low", "Title", "Desc", 30}, {"/schemas/title-high", "Title", "", 95}, @@ -329,3 +319,120 @@ TEST(Search, search_health_mixed_metadata_and_health) { EXPECT_SEARCH_RESULT(result, 3, "/schemas/title-low", "Title", ""); EXPECT_SEARCH_RESULT(result, 4, "/schemas/none-perfect", "", ""); } + +TEST(Search_query, invalid_payload_too_small_for_header) { + const std::vector garbage{0x01, 0x02, 0x03}; + const auto result{ + sourcemeta::one::search(garbage.data(), garbage.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_header_claims_too_many_entries) { + sourcemeta::one::SearchIndexHeader header{}; + header.entry_count = 1000; + header.records_offset = + sizeof(sourcemeta::one::SearchIndexHeader) + 1000 * sizeof(std::uint32_t); + std::vector payload(sizeof(sourcemeta::one::SearchIndexHeader)); + std::memcpy(payload.data(), &header, + sizeof(sourcemeta::one::SearchIndexHeader)); + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_offset_points_beyond_payload) { + sourcemeta::one::SearchIndexHeader header{}; + header.entry_count = 1; + header.records_offset = + sizeof(sourcemeta::one::SearchIndexHeader) + sizeof(std::uint32_t); + std::vector payload(sizeof(sourcemeta::one::SearchIndexHeader) + + sizeof(std::uint32_t)); + std::memcpy(payload.data(), &header, + sizeof(sourcemeta::one::SearchIndexHeader)); + const std::uint32_t bad_offset{99999}; + std::memcpy(payload.data() + sizeof(sourcemeta::one::SearchIndexHeader), + &bad_offset, sizeof(std::uint32_t)); + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_record_field_lengths_exceed_payload) { + std::vector entries{ + {"/foo", "Title", "Desc", 80}}; + auto payload{sourcemeta::one::make_search(std::move(entries))}; + + sourcemeta::one::SearchIndexHeader header{}; + std::memcpy(&header, payload.data(), + sizeof(sourcemeta::one::SearchIndexHeader)); + + sourcemeta::one::SearchRecordHeader bad_record{}; + bad_record.path_length = 60000; + bad_record.title_length = 60000; + bad_record.description_length = 60000; + std::memcpy(payload.data() + header.records_offset, &bad_record, + sizeof(sourcemeta::one::SearchRecordHeader)); + + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_zero_entry_count) { + sourcemeta::one::SearchIndexHeader header{}; + header.entry_count = 0; + header.records_offset = sizeof(sourcemeta::one::SearchIndexHeader); + std::vector payload(sizeof(sourcemeta::one::SearchIndexHeader)); + std::memcpy(payload.data(), &header, + sizeof(sourcemeta::one::SearchIndexHeader)); + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_all_zeros) { + const std::vector payload(64, 0); + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_random_garbage) { + const std::vector payload{0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, + 0xF9, 0xF8, 0xF7, 0xF6, 0xF5, 0xF4, + 0xF3, 0xF2, 0xF1, 0xF0}; + const auto result{ + sourcemeta::one::search(payload.data(), payload.size(), "test")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_truncated_after_header) { + std::vector entries{ + {"/foo", "Title", "Desc", 80}}; + const auto full_payload{sourcemeta::one::make_search(std::move(entries))}; + const auto truncated_size{sizeof(sourcemeta::one::SearchIndexHeader)}; + const auto result{ + sourcemeta::one::search(full_payload.data(), truncated_size, "foo")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +} + +TEST(Search_query, invalid_payload_truncated_mid_record) { + std::vector entries{ + {"/foo", "Title", "Desc", 80}}; + const auto full_payload{sourcemeta::one::make_search(std::move(entries))}; + const auto truncated_size{sizeof(sourcemeta::one::SearchIndexHeader) + + sizeof(std::uint32_t) + + sizeof(sourcemeta::one::SearchRecordHeader) + 2}; + const auto result{ + sourcemeta::one::search(full_payload.data(), truncated_size, "foo")}; + EXPECT_TRUE(result.is_array()); + EXPECT_EQ(result.size(), 0); +}