Skip to content

Commit b34ebb6

Browse files
[#29388] docdb: Add /snapshots endpoint to tserver UI
Summary: This diff adds a `/snapshots` endpoint to the tserver UI. This endpoint displays information about the space overhead of snapshots. The endpoint has a table for each namespace. Each table has a row for each snapshot in the schedule (in reverse chronological order), and a special row at the top representing the active RocksDB files. Each row contains: - The snapshot ID - The time the snapshot was taken (UTC) - The cumulative space required to store this snapshot, newer snapshots, and active RocksDB. It also has the incremental size in parentheses. - The space required to store the snapshot's exclusive data (i.e., not shared with any snapshots or active RocksDB) - The schedule ID, if the snapshot was taken as part of a schedule For example, suppose a user has a snapshot schedule `foo` with a retention of 3 hours and an interval of 1 hour, and the data in the snapshots are as follows: - Active RocksDB: 10.sst (40 GB), 11.sst (20 GB), 12.sst (10 GB) - s3 (10 AM): 8.sst (2 GB), 9.sst (5 GB), 10.sst (40 GB) - s2 (9 AM): 8.sst (2 GB), 9.sst (5 GB) - s1 (8 AM): 7.sst (1 GB), 8.sst (2 GB) The table would display: | Snapshot ID | Snapshot Time (UTC) | Cumulative Size (+ Incremental size) | Exclusive Size | Schedule ID | Active RocksDB | N/A | 70G | 30G | N/A | s3 | 10 AM | 77G (+ 7G) | 0B | foo | s2 | 9 AM | 77G (+ 0B) | 0B | foo | s1 | 8 AM | 78G (+ 1G) | 1G | foo This diff also moved some useful test functions from `master_path_handlers-itest.cc` to a new utility file, `path_handlers_util.cc`, so that they could be used for the new endpoint's test. Test Plan: Ran `CassandraBatchKeyValue` against a cluster on my dev-server, created a snapshot schedule, and visited the tserver UI at `127.0.0.1:9000/snapshots`. ``` java -jar ../yb-sample-apps.jar --workload CassandraBatchKeyValue --nodes 127.0.0.1:9042 --num_unique_keys 200000000000 --num_reads 1500000 --num_writes 200000000000 --num_threads_read 0 num_threads_write 24 ./build/latest/bin/yb-admin create_snapshot_schedule 5 15 ycql.ybdemo_keyspace ``` I ran the following command to compact the table between snapshots: ``` ./build/latest/bin/yb-admin --master_addresses 127.0.0.1:7100 compact_table ycql.ybdemo_keyspace cassandrakeyvalue ``` The output of the page was: {F421691} With an unscheduled snapshot, the output looks like this (snapshot `028da231-5fe8-41eb-b628-3ee9a2e41475` was unscheduled): {F422159} Also created a basic unit test to verify that the endpoint works: `./yb_build.sh release --cxx-test integration-tests_tserver_path_handlers-itest --gtest_filter=TServerPathHandlersItest.TestSnapshotsEndpoint` Reviewers: mhaddad, zdrudi Reviewed By: mhaddad, zdrudi Subscribers: hsunder, kannan, ybase Differential Revision: https://phorge.dev.yugabyte.com/D48482
1 parent 1278808 commit b34ebb6

16 files changed

+523
-114
lines changed

src/yb/integration-tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ set(INTEGRATION_TESTS_SRCS
9494
mini_cluster_base.cc
9595
mini_cluster_utils.cc
9696
packed_row_test_base.cc
97+
path_handlers_util.cc
9798
postgres-minicluster.cc
9899
redis_table_test_base.cc
99100
test_workload.cc

src/yb/integration-tests/master_path_handlers-itest.cc

Lines changed: 35 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "yb/integration-tests/cluster_itest_util.h"
3636
#include "yb/integration-tests/external_mini_cluster.h"
3737
#include "yb/integration-tests/mini_cluster.h"
38+
#include "yb/integration-tests/path_handlers_util.h"
3839
#include "yb/integration-tests/yb_mini_cluster_test_base.h"
3940

4041
#include "yb/master/catalog_entity_info.h"
@@ -88,7 +89,7 @@ DECLARE_int32(TEST_sleep_before_reporting_lb_ui_ms);
8889
DECLARE_bool(ysql_enable_auto_analyze_infra);
8990
DECLARE_int32(tablet_overhead_size_percentage);
9091

91-
namespace yb::master {
92+
namespace yb::integration_tests {
9293

9394
using std::string;
9495
using std::vector;
@@ -124,21 +125,20 @@ class MasterPathHandlersBaseItest : public YBMiniClusterTestBase<T> {
124125
}
125126

126127
protected:
127-
// Attempts to fetch url until a response with status OK, or until timeout.
128-
// On mac the curl command fails with error "A libcurl function was given a bad argument", but
129-
// succeeds on retries.
130-
Status GetUrl(const string& query_path, faststring* result, MonoDelta timeout = 30s) {
131-
const string url = master_http_url_ + query_path;
132-
Status status;
133-
return WaitFor(
134-
[&]() -> bool {
135-
EasyCurl curl;
136-
status = curl.FetchURL(url, result);
137-
YB_LOG_IF_EVERY_N(WARNING, !status.ok(), 5) << status;
138-
139-
return status.ok();
140-
},
141-
timeout, "Wait for curl response to return with status OK");
128+
Status GetUrl(const std::string& query_path, faststring* result) {
129+
return path_handlers_util::GetUrl(master_http_url_ + query_path, result);
130+
}
131+
132+
Result<std::vector<std::vector<std::string>>> GetHtmlTableRows(
133+
const std::string& url, const std::string& html_table_tag_id) {
134+
return path_handlers_util::GetHtmlTableRows(master_http_url_ + url, html_table_tag_id);
135+
}
136+
137+
Result<std::vector<std::string>> GetHtmlTableColumn(
138+
const std::string& url, const std::string& html_table_tag_id,
139+
const std::string& column_header) {
140+
return path_handlers_util::GetHtmlTableColumn(
141+
master_http_url_ + url, html_table_tag_id, column_header);
142142
}
143143

144144
virtual int num_tablet_servers() const {
@@ -221,74 +221,6 @@ class MasterPathHandlersItest : public MasterPathHandlersBaseItest<MiniCluster>
221221
client_ = ASSERT_RESULT(cluster_->CreateClient());
222222
}
223223

224-
// Returns the rows in a table with a given id, excluding the header row.
225-
Result<std::vector<std::vector<std::string>>> GetHtmlTableRows(
226-
const std::string& url, const std::string& html_table_tag_id, bool include_header = false) {
227-
faststring result;
228-
RETURN_NOT_OK(GetUrl(url, &result));
229-
const auto webpage = result.ToString();
230-
// Using [^]* to matches all characters instead of .* because . does not match newlines.
231-
const std::regex table_regex(
232-
Format("<table[^>]*id='$0'[^>]*>([^]*?)</table>", html_table_tag_id));
233-
const std::regex row_regex(Format("<tr>([^]*?)</tr>"));
234-
const std::regex col_regex(Format("<td[^>]*>([^]*?)</td>"));
235-
const std::regex header_regex("<th[^>]*>([^>]*?)</th>");
236-
237-
std::smatch match;
238-
std::regex_search(webpage, match, table_regex);
239-
240-
// [0] is the full match.
241-
if (match.size() < 1) {
242-
LOG(INFO) << "Full webpage: " << webpage;
243-
return STATUS_FORMAT(NotFound, "Table with id $0 not found", html_table_tag_id);
244-
}
245-
// Match[1] is the first capture group, and contains everything inside the <table> tags.
246-
std::string table = match[1];
247-
248-
std::vector<std::vector<std::string>> rows;
249-
// Start at the second row to skip the header.
250-
auto table_begin = std::sregex_iterator(table.begin(), table.end(), row_regex);
251-
if (!include_header) {
252-
++table_begin;
253-
}
254-
for (auto row_it = table_begin; row_it != std::sregex_iterator(); ++row_it) {
255-
auto row = row_it->str(1);
256-
std::vector<std::string> cols;
257-
std::regex regex;
258-
if (include_header && rows.empty()) {
259-
regex = header_regex;
260-
} else {
261-
regex = col_regex;
262-
}
263-
const auto row_begin = std::sregex_iterator(row.begin(), row.end(), regex);
264-
for (auto col_it = row_begin; col_it != std::sregex_iterator(); ++col_it) {
265-
cols.push_back(col_it->str(1));
266-
}
267-
rows.push_back(std::move(cols));
268-
}
269-
return rows;
270-
}
271-
272-
Result<std::vector<std::string>> GetHtmlTableColumn(
273-
const std::string& url, const std::string& html_table_tag_id,
274-
const std::string& column_header) {
275-
auto rows = VERIFY_RESULT(GetHtmlTableRows(url, html_table_tag_id, /* include_header= */ true));
276-
if (rows.size() < 1) {
277-
return STATUS_FORMAT(
278-
NotFound, "Couldn't find table at url $0 with tag id $1", url, html_table_tag_id);
279-
}
280-
auto it = std::find(rows[0].begin(), rows[0].end(), column_header);
281-
if (it == rows[0].end()) {
282-
return STATUS_FORMAT(
283-
NotFound, "Couldn't find column with header $0 at url $1 with tag id $2", column_header,
284-
url, html_table_tag_id);
285-
}
286-
auto col_idx = it - rows[0].begin();
287-
auto rng = rows | std::views::drop(1) |
288-
std::views::transform([&col_idx](const auto& row) { return row[col_idx]; });
289-
return std::vector(std::ranges::begin(rng), std::ranges::end(rng));
290-
}
291-
292224
void ExpectLoadDistributionViewTabletsShown(int tablet_count) {
293225
// This code expects that we have 3 TServers, 1 table, and RF 3.
294226
int expected_replicas = tablet_count * 3;
@@ -329,12 +261,12 @@ class MasterPathHandlersItest : public MasterPathHandlersBaseItest<MiniCluster>
329261
bool verifyTServersAlive(int n, const string& result) {
330262
size_t pos = 0;
331263
for (int i = 0; i < n; i++) {
332-
pos = result.find(kTserverAlive, pos + 1);
264+
pos = result.find(master::kTserverAlive, pos + 1);
333265
if (pos == string::npos) {
334266
return false;
335267
}
336268
}
337-
return result.find(kTserverAlive, pos + 1) == string::npos;
269+
return result.find(master::kTserverAlive, pos + 1) == string::npos;
338270
}
339271

340272
TEST_F(MasterPathHandlersItest, TestMasterPathHandlers) {
@@ -361,9 +293,9 @@ TEST_F(MasterPathHandlersItest, TestDeadTServers) {
361293
ASSERT_TRUE(verifyTServersAlive(2, result_str));
362294

363295
// Now verify dead.
364-
size_t pos = result_str.find(kTserverDead, 0);
296+
size_t pos = result_str.find(master::kTserverDead, 0);
365297
ASSERT_TRUE(pos != string::npos);
366-
ASSERT_TRUE(result_str.find(kTserverDead, pos + 1) == string::npos);
298+
ASSERT_TRUE(result_str.find(master::kTserverDead, pos + 1) == string::npos);
367299

368300
// Startup the tserver and wait for heartbeats.
369301
ASSERT_OK(cluster_->mini_tablet_server(0)->Start(tserver::WaitTabletsBootstrapped::kFalse));
@@ -384,7 +316,7 @@ TEST_F(MasterPathHandlersItest, TestTabletReplicationEndpoint) {
384316

385317
// Choose a tablet to orphan and take note of the servers which are leaders/followers for this
386318
// tablet.
387-
google::protobuf::RepeatedPtrField<TabletLocationsPB> tablets;
319+
google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets;
388320
ASSERT_OK(client_->GetTabletsFromTableId(table->id(), kNumTablets, &tablets));
389321
std::vector<yb::tserver::MiniTabletServer *> followers;
390322
yb::tserver::MiniTabletServer* leader = nullptr;
@@ -572,7 +504,7 @@ TEST_F(MasterPathHandlersItest, TestTableJsonEndpointValidTableId) {
572504

573505
// Call endpoint and validate format of response.
574506
faststring result;
575-
ASSERT_OK(GetUrl(Format("/api/v1/table?id=$0", table->id()), &result, 30s /* timeout */));
507+
ASSERT_OK(GetUrl(Format("/api/v1/table?id=$0", table->id()), &result));
576508

577509
JsonReader r(result.ToString());
578510
ASSERT_OK(r.Init());
@@ -603,8 +535,8 @@ TEST_F(MasterPathHandlersItest, TestTableJsonEndpointValidTableName) {
603535
// Call endpoint and validate format of response.
604536
faststring result;
605537
ASSERT_OK(GetUrl(
606-
Format("/api/v1/table?keyspace_name=$0&table_name=$1", kKeyspaceName, "test_table"), &result,
607-
30s /* timeout */));
538+
Format("/api/v1/table?keyspace_name=$0&table_name=$1", kKeyspaceName, "test_table"),
539+
&result));
608540

609541
JsonReader r(result.ToString());
610542
ASSERT_OK(r.Init());
@@ -622,7 +554,7 @@ TEST_F(MasterPathHandlersItest, TestTableJsonEndpointInvalidTableId) {
622554

623555
// Call endpoint and validate format of response.
624556
faststring result;
625-
ASSERT_OK(GetUrl("/api/v1/table?id=12345", &result, 30s /* timeout */));
557+
ASSERT_OK(GetUrl("/api/v1/table?id=12345", &result));
626558

627559
JsonReader r(result.ToString());
628560
ASSERT_OK(r.Init());
@@ -638,7 +570,7 @@ TEST_F(MasterPathHandlersItest, TestTableJsonEndpointNoArgs) {
638570

639571
// Call endpoint and validate format of response.
640572
faststring result;
641-
ASSERT_OK(GetUrl("/api/v1/table", &result, 30s /* timeout */));
573+
ASSERT_OK(GetUrl("/api/v1/table", &result));
642574

643575
JsonReader r(result.ToString());
644576
ASSERT_OK(r.Init());
@@ -653,7 +585,7 @@ TEST_F(MasterPathHandlersItest, TestTablesJsonEndpoint) {
653585
auto table = CreateTestTable();
654586

655587
faststring result;
656-
ASSERT_OK(GetUrl("/api/v1/tables", &result, 30s /* timeout */));
588+
ASSERT_OK(GetUrl("/api/v1/tables", &result));
657589

658590
JsonReader r(result.ToString());
659591
ASSERT_OK(r.Init());
@@ -670,7 +602,7 @@ TEST_F(MasterPathHandlersItest, TestTablesJsonEndpoint) {
670602
const rapidjson::Value& table_obj = (*json_obj)["user"][0];
671603
EXPECT_EQ(kKeyspaceName, table_obj["keyspace"].GetString());
672604
EXPECT_EQ(table_name.table_name(), table_obj["table_name"].GetString());
673-
EXPECT_EQ(SysTablesEntryPB_State_Name(SysTablesEntryPB_State_RUNNING),
605+
EXPECT_EQ(SysTablesEntryPB_State_Name(master::SysTablesEntryPB_State_RUNNING),
674606
table_obj["state"].GetString());
675607
EXPECT_EQ(table_obj["message"].GetString(), string());
676608
EXPECT_EQ(table->id(), table_obj["uuid"].GetString());
@@ -701,7 +633,7 @@ TEST_F(MasterPathHandlersItest, TestMemTrackersJsonEndpoint) {
701633
auto table = CreateTestTable();
702634

703635
faststring result;
704-
ASSERT_OK(GetUrl("/api/v1/mem-trackers", &result, 30s /* timeout */));
636+
ASSERT_OK(GetUrl("/api/v1/mem-trackers", &result));
705637

706638
JsonReader r(result.ToString());
707639
ASSERT_OK(r.Init());
@@ -770,8 +702,7 @@ TEST_F_EX(MasterPathHandlersItest, ShowDeletedTablets, TabletSplitMasterPathHand
770702
[this, &table](const bool should_show_deleted) -> Result<bool> {
771703
faststring result;
772704
RETURN_NOT_OK(GetUrl(
773-
"/table?id=" + table->id() + (should_show_deleted ? "&show_deleted" : ""), &result,
774-
30s /* timeout */));
705+
"/table?id=" + table->id() + (should_show_deleted ? "&show_deleted" : ""), &result));
775706
const auto webpage = result.ToString();
776707
std::smatch match;
777708
const std::regex regex(
@@ -1509,7 +1440,7 @@ TEST_F(MasterPathHandlersItest, TestLeaderlessDeletedTablet) {
15091440
MonoTime last_time_with_valid_leader_override = MonoTime::Now();
15101441
last_time_with_valid_leader_override.SubtractDelta(kLeaderlessTabletAlertDelaySecs * 1s);
15111442
for (auto& tablet : tablets) {
1512-
auto replicas = std::make_shared<TabletReplicaMap>(*tablet->GetReplicaLocations());
1443+
auto replicas = std::make_shared<master::TabletReplicaMap>(*tablet->GetReplicaLocations());
15131444
for (auto& replica : *replicas) {
15141445
replica.second.role = PeerRole::FOLLOWER;
15151446
}
@@ -1521,11 +1452,11 @@ TEST_F(MasterPathHandlersItest, TestLeaderlessDeletedTablet) {
15211452
auto replaced_tablet = tablets[2];
15221453

15231454
auto deleted_lock = deleted_tablet->LockForWrite();
1524-
deleted_lock.mutable_data()->set_state(SysTabletsEntryPB::DELETED, "");
1455+
deleted_lock.mutable_data()->set_state(master::SysTabletsEntryPB::DELETED, "");
15251456
deleted_lock.Commit();
15261457

15271458
auto replaced_lock = replaced_tablet->LockForWrite();
1528-
replaced_lock.mutable_data()->set_state(SysTabletsEntryPB::REPLACED, "");
1459+
replaced_lock.mutable_data()->set_state(master::SysTabletsEntryPB::REPLACED, "");
15291460
replaced_lock.Commit();
15301461

15311462
// Only the RUNNING tablet should be returned in the endpoint.
@@ -1962,7 +1893,7 @@ TEST_F(MasterPathHandlersItest, TabletLimitsSkipBlacklistedTServers) {
19621893
auto original_value = std::stoll(cols[0]);
19631894

19641895
auto ts = cluster_->mini_tablet_server(0);
1965-
auto cluster_client = MasterClusterClient(
1896+
auto cluster_client = master::MasterClusterClient(
19661897
ASSERT_RESULT(cluster_->GetLeaderMasterProxy<master::MasterClusterProxy>()));
19671898
for (const auto& hp : ts->options()->broadcast_addresses) {
19681899
ASSERT_OK(cluster_client.BlacklistHost(hp.ToPB<HostPortPB>()));
@@ -1978,4 +1909,4 @@ TEST_F(MasterPathHandlersItest, TabletLimitsSkipBlacklistedTServers) {
19781909
10s, "Reported tablet limit should decrease"));
19791910
}
19801911

1981-
} // namespace yb::master
1912+
} // namespace yb::integration_tests
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright (c) YugabyteDB, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4+
// in compliance with the License. You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software distributed under the License
9+
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10+
// or implied. See the License for the specific language governing permissions and limitations
11+
// under the License.
12+
//
13+
14+
#include <regex>
15+
#include <string>
16+
#include <vector>
17+
18+
#include "yb/integration-tests/path_handlers_util.h"
19+
20+
#include "yb/util/backoff_waiter.h"
21+
#include "yb/util/curl_util.h"
22+
23+
namespace yb::integration_tests::path_handlers_util {
24+
25+
// Attempts to fetch url until a response with status OK, or until timeout.
26+
// On mac the curl command fails with error "A libcurl function was given a bad argument", but
27+
// succeeds on retries.
28+
Status GetUrl(const std::string& url, faststring* result, MonoDelta timeout) {
29+
Status status;
30+
return WaitFor(
31+
[&]() -> bool {
32+
EasyCurl curl;
33+
status = curl.FetchURL(url, result);
34+
YB_LOG_IF_EVERY_N(WARNING, !status.ok(), 5) << status;
35+
36+
return status.ok();
37+
},
38+
timeout, "Wait for curl response to return with status OK");
39+
}
40+
41+
// Returns the rows in a table with a given id, excluding the header row.
42+
Result<std::vector<std::vector<std::string>>> GetHtmlTableRows(
43+
const std::string& url, const std::string& html_table_tag_id, bool include_header) {
44+
faststring result;
45+
RETURN_NOT_OK(GetUrl(url, &result));
46+
const auto webpage = result.ToString();
47+
// Using [^]* to matches all characters instead of .* because . does not match newlines.
48+
const std::regex table_regex(
49+
Format("<table[^>]*id='$0'[^>]*>([^]*?)</table>", html_table_tag_id));
50+
const std::regex row_regex(Format("<tr>([^]*?)</tr>"));
51+
const std::regex col_regex(Format("<td[^>]*>([^]*?)</td>"));
52+
const std::regex header_regex("<th[^>]*>([^>]*?)</th>");
53+
54+
std::smatch match;
55+
std::regex_search(webpage, match, table_regex);
56+
57+
// [0] is the full match.
58+
if (match.size() < 1) {
59+
LOG(INFO) << "Full webpage: " << webpage;
60+
return STATUS_FORMAT(NotFound, "Table with id $0 not found", html_table_tag_id);
61+
}
62+
// Match[1] is the first capture group, and contains everything inside the <table> tags.
63+
std::string table = match[1];
64+
65+
std::vector<std::vector<std::string>> rows;
66+
// Start at the second row to skip the header.
67+
auto table_begin = std::sregex_iterator(table.begin(), table.end(), row_regex);
68+
if (!include_header) {
69+
++table_begin;
70+
}
71+
for (auto row_it = table_begin; row_it != std::sregex_iterator(); ++row_it) {
72+
auto row = row_it->str(1);
73+
std::vector<std::string> cols;
74+
std::regex regex;
75+
if (include_header && rows.empty()) {
76+
regex = header_regex;
77+
} else {
78+
regex = col_regex;
79+
}
80+
const auto row_begin = std::sregex_iterator(row.begin(), row.end(), regex);
81+
for (auto col_it = row_begin; col_it != std::sregex_iterator(); ++col_it) {
82+
cols.push_back(col_it->str(1));
83+
}
84+
rows.push_back(std::move(cols));
85+
}
86+
return rows;
87+
}
88+
89+
Result<std::vector<std::string>> GetHtmlTableColumn(
90+
const std::string& url, const std::string& html_table_tag_id,
91+
const std::string& column_header) {
92+
auto rows = VERIFY_RESULT(GetHtmlTableRows(url, html_table_tag_id, /* include_header= */ true));
93+
if (rows.size() < 1) {
94+
return STATUS_FORMAT(
95+
NotFound, "Couldn't find table at url $0 with tag id $1", url, html_table_tag_id);
96+
}
97+
auto it = std::find(rows[0].begin(), rows[0].end(), column_header);
98+
if (it == rows[0].end()) {
99+
return STATUS_FORMAT(
100+
NotFound, "Couldn't find column with header $0 at url $1 with tag id $2", column_header,
101+
url, html_table_tag_id);
102+
}
103+
auto col_idx = it - rows[0].begin();
104+
auto rng = rows | std::views::drop(1) |
105+
std::views::transform([&col_idx](const auto& row) { return row[col_idx]; });
106+
return std::vector(std::ranges::begin(rng), std::ranges::end(rng));
107+
}
108+
109+
} // namespace yb::integration_tests::path_handlers_util

0 commit comments

Comments
 (0)