Skip to content

Commit 768c659

Browse files
committed
feat: introduce metrics core packages
1 parent ae98e6f commit 768c659

File tree

3 files changed

+374
-0
lines changed

3 files changed

+374
-0
lines changed

internal/metrics/prometheus.go

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
)
7+
8+
// Metrics holds all Prometheus metrics
9+
type Metrics struct {
10+
// Ingestion metrics
11+
IngestEventsTotal *prometheus.CounterVec
12+
IngestErrorsTotal *prometheus.CounterVec
13+
IngestDuration *prometheus.HistogramVec
14+
BufferSize prometheus.Gauge
15+
16+
// Storage metrics
17+
StorageWritesTotal *prometheus.CounterVec
18+
StorageErrorsTotal *prometheus.CounterVec
19+
StorageDuration *prometheus.HistogramVec
20+
21+
// API metrics
22+
APIRequestsTotal *prometheus.CounterVec
23+
APIRequestDuration *prometheus.HistogramVec
24+
APIErrorsTotal *prometheus.CounterVec
25+
26+
// MCP metrics (from stored data)
27+
MCPToolCallsTotal *prometheus.CounterVec
28+
MCPToolDuration *prometheus.HistogramVec
29+
MCPToolErrorsTotal *prometheus.CounterVec
30+
MCPSessionsActive *prometheus.GaugeVec
31+
MCPSessionsTotal *prometheus.CounterVec
32+
}
33+
34+
// NewMetrics creates new Prometheus metrics
35+
func NewMetrics() *Metrics {
36+
return &Metrics{
37+
// Ingestion metrics
38+
IngestEventsTotal: promauto.NewCounterVec(
39+
prometheus.CounterOpts{
40+
Name: "mcpulse_ingest_events_total",
41+
Help: "Total number of events ingested",
42+
},
43+
[]string{"type", "status"},
44+
),
45+
IngestErrorsTotal: promauto.NewCounterVec(
46+
prometheus.CounterOpts{
47+
Name: "mcpulse_ingest_errors_total",
48+
Help: "Total number of ingestion errors",
49+
},
50+
[]string{"error_type"},
51+
),
52+
IngestDuration: promauto.NewHistogramVec(
53+
prometheus.HistogramOpts{
54+
Name: "mcpulse_ingest_duration_seconds",
55+
Help: "Duration of ingestion operations",
56+
Buckets: prometheus.DefBuckets,
57+
},
58+
[]string{"operation"},
59+
),
60+
BufferSize: promauto.NewGauge(
61+
prometheus.GaugeOpts{
62+
Name: "mcpulse_buffer_size",
63+
Help: "Current size of the ingestion buffer",
64+
},
65+
),
66+
67+
// Storage metrics
68+
StorageWritesTotal: promauto.NewCounterVec(
69+
prometheus.CounterOpts{
70+
Name: "mcpulse_storage_writes_total",
71+
Help: "Total number of storage writes",
72+
},
73+
[]string{"table"},
74+
),
75+
StorageErrorsTotal: promauto.NewCounterVec(
76+
prometheus.CounterOpts{
77+
Name: "mcpulse_storage_errors_total",
78+
Help: "Total number of storage errors",
79+
},
80+
[]string{"table", "error_type"},
81+
),
82+
StorageDuration: promauto.NewHistogramVec(
83+
prometheus.HistogramOpts{
84+
Name: "mcpulse_storage_duration_seconds",
85+
Help: "Duration of storage operations",
86+
Buckets: prometheus.DefBuckets,
87+
},
88+
[]string{"operation"},
89+
),
90+
91+
// API metrics
92+
APIRequestsTotal: promauto.NewCounterVec(
93+
prometheus.CounterOpts{
94+
Name: "mcpulse_api_requests_total",
95+
Help: "Total number of API requests",
96+
},
97+
[]string{"method", "path", "status"},
98+
),
99+
APIRequestDuration: promauto.NewHistogramVec(
100+
prometheus.HistogramOpts{
101+
Name: "mcpulse_api_request_duration_seconds",
102+
Help: "Duration of API requests",
103+
Buckets: prometheus.DefBuckets,
104+
},
105+
[]string{"method", "path"},
106+
),
107+
APIErrorsTotal: promauto.NewCounterVec(
108+
prometheus.CounterOpts{
109+
Name: "mcpulse_api_errors_total",
110+
Help: "Total number of API errors",
111+
},
112+
[]string{"method", "path", "error_type"},
113+
),
114+
115+
// MCP metrics
116+
MCPToolCallsTotal: promauto.NewCounterVec(
117+
prometheus.CounterOpts{
118+
Name: "mcp_tool_calls_total",
119+
Help: "Total number of MCP tool calls",
120+
},
121+
[]string{"server_id", "tool_name", "status"},
122+
),
123+
MCPToolDuration: promauto.NewHistogramVec(
124+
prometheus.HistogramOpts{
125+
Name: "mcp_tool_duration_seconds",
126+
Help: "Duration of MCP tool calls",
127+
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
128+
},
129+
[]string{"server_id", "tool_name"},
130+
),
131+
MCPToolErrorsTotal: promauto.NewCounterVec(
132+
prometheus.CounterOpts{
133+
Name: "mcp_tool_errors_total",
134+
Help: "Total number of MCP tool errors",
135+
},
136+
[]string{"server_id", "tool_name", "error_type"},
137+
),
138+
MCPSessionsActive: promauto.NewGaugeVec(
139+
prometheus.GaugeOpts{
140+
Name: "mcp_sessions_active",
141+
Help: "Number of active MCP sessions",
142+
},
143+
[]string{"server_id"},
144+
),
145+
MCPSessionsTotal: promauto.NewCounterVec(
146+
prometheus.CounterOpts{
147+
Name: "mcp_sessions_total",
148+
Help: "Total number of MCP sessions",
149+
},
150+
[]string{"server_id"},
151+
),
152+
}
153+
}
154+
155+
// RecordIngest records an ingestion event
156+
func (m *Metrics) RecordIngest(eventType string, success bool) {
157+
status := "success"
158+
if !success {
159+
status = "failed"
160+
}
161+
m.IngestEventsTotal.WithLabelValues(eventType, status).Inc()
162+
}
163+
164+
// RecordIngestError records an ingestion error
165+
func (m *Metrics) RecordIngestError(errorType string) {
166+
m.IngestErrorsTotal.WithLabelValues(errorType).Inc()
167+
}
168+
169+
// RecordStorageWrite records a storage write
170+
func (m *Metrics) RecordStorageWrite(table string) {
171+
m.StorageWritesTotal.WithLabelValues(table).Inc()
172+
}
173+
174+
// RecordStorageError records a storage error
175+
func (m *Metrics) RecordStorageError(table, errorType string) {
176+
m.StorageErrorsTotal.WithLabelValues(table, errorType).Inc()
177+
}
178+
179+
// UpdateBufferSize updates the buffer size gauge
180+
func (m *Metrics) UpdateBufferSize(size int) {
181+
m.BufferSize.Set(float64(size))
182+
}

internal/models/metrics.go

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package models
2+
3+
import (
4+
"time"
5+
)
6+
7+
// ToolCallMetric represents a single tool call event
8+
type ToolCallMetric struct {
9+
ID string `json:"id"`
10+
Timestamp time.Time `json:"timestamp"`
11+
ServerID string `json:"server_id"`
12+
SessionID *string `json:"session_id,omitempty"`
13+
ToolName string `json:"tool_name"`
14+
Parameters map[string]interface{} `json:"parameters,omitempty"`
15+
DurationMS int64 `json:"duration_ms"`
16+
Status string `json:"status"` // success, error, timeout
17+
ErrorMessage *string `json:"error_message,omitempty"`
18+
ErrorType *string `json:"error_type,omitempty"`
19+
ResultSize *int `json:"result_size,omitempty"`
20+
ProtocolVersion string `json:"protocol_version,omitempty"`
21+
ClientName string `json:"client_name,omitempty"`
22+
ClientVersion string `json:"client_version,omitempty"`
23+
}
24+
25+
// ResourceAccessMetric represents a resource access event
26+
type ResourceAccessMetric struct {
27+
ID string `json:"id"`
28+
Timestamp time.Time `json:"timestamp"`
29+
ServerID string `json:"server_id"`
30+
SessionID *string `json:"session_id,omitempty"`
31+
ResourceURI string `json:"resource_uri"`
32+
AccessType string `json:"access_type"` // read, subscribe
33+
DurationMS int64 `json:"duration_ms"`
34+
CacheHit bool `json:"cache_hit"`
35+
Status string `json:"status"`
36+
Size *int `json:"size,omitempty"`
37+
}
38+
39+
// SessionMetric represents a session lifecycle event
40+
type SessionMetric struct {
41+
ID string `json:"id"`
42+
ServerID string `json:"server_id"`
43+
StartTime time.Time `json:"start_time"`
44+
EndTime *time.Time `json:"end_time,omitempty"`
45+
DurationMS *int64 `json:"duration_ms,omitempty"`
46+
ToolCallCount int `json:"tool_call_count"`
47+
ResourceAccessCount int `json:"resource_access_count"`
48+
PromptInvocationCount int `json:"prompt_invocation_count"`
49+
ProtocolVersion string `json:"protocol_version"`
50+
ClientName string `json:"client_name"`
51+
ClientVersion string `json:"client_version"`
52+
}
53+
54+
// ServerInfo represents metadata about an MCP server
55+
type ServerInfo struct {
56+
ID string `json:"id"`
57+
Name string `json:"name"`
58+
Description string `json:"description,omitempty"`
59+
Version string `json:"version,omitempty"`
60+
FirstSeen time.Time `json:"first_seen"`
61+
LastSeen time.Time `json:"last_seen"`
62+
Metadata map[string]interface{} `json:"metadata,omitempty"`
63+
}
64+
65+
// ToolStatistics represents aggregated tool statistics
66+
type ToolStatistics struct {
67+
ToolName string `json:"tool_name"`
68+
TimeWindow time.Time `json:"time_window"`
69+
WindowSize string `json:"window_size"` // 1m, 5m, 1h, 1d
70+
CallCount int64 `json:"call_count"`
71+
SuccessCount int64 `json:"success_count"`
72+
ErrorCount int64 `json:"error_count"`
73+
TimeoutCount int64 `json:"timeout_count"`
74+
AvgDurationMS float64 `json:"avg_duration_ms"`
75+
P50DurationMS int64 `json:"p50_duration_ms"`
76+
P95DurationMS int64 `json:"p95_duration_ms"`
77+
P99DurationMS int64 `json:"p99_duration_ms"`
78+
MaxDurationMS int64 `json:"max_duration_ms"`
79+
MinDurationMS int64 `json:"min_duration_ms"`
80+
}
81+
82+
// IngestRequest represents a batch of metrics to ingest
83+
type IngestRequest struct {
84+
Metrics []ToolCallMetric `json:"metrics"`
85+
}
86+
87+
// IngestResponse represents the response from an ingest request
88+
type IngestResponse struct {
89+
Accepted int `json:"accepted"`
90+
Rejected int `json:"rejected"`
91+
Errors []IngestError `json:"errors,omitempty"`
92+
}
93+
94+
// IngestError represents an error during ingestion
95+
type IngestError struct {
96+
Index int `json:"index"`
97+
Reason string `json:"reason"`
98+
}
99+
100+
// ServerMetrics represents aggregated metrics for a server
101+
type ServerMetrics struct {
102+
ServerID string `json:"server_id"`
103+
TimeRange TimeRange `json:"time_range"`
104+
TotalCalls int64 `json:"total_calls"`
105+
SuccessRate float64 `json:"success_rate"`
106+
ErrorRate float64 `json:"error_rate"`
107+
AvgLatencyMS float64 `json:"avg_latency_ms"`
108+
P95LatencyMS int64 `json:"p95_latency_ms"`
109+
P99LatencyMS int64 `json:"p99_latency_ms"`
110+
ActiveSessions int `json:"active_sessions"`
111+
UniqueTools int `json:"unique_tools_used"`
112+
Timeline []TimelinePoint `json:"timeline,omitempty"`
113+
}
114+
115+
// TimeRange represents a time range for queries
116+
type TimeRange struct {
117+
From time.Time `json:"from"`
118+
To time.Time `json:"to"`
119+
}
120+
121+
// TimelinePoint represents a single point in a timeline
122+
type TimelinePoint struct {
123+
Timestamp time.Time `json:"timestamp"`
124+
Calls int64 `json:"calls"`
125+
Errors int64 `json:"errors"`
126+
AvgLatencyMS float64 `json:"avg_latency_ms"`
127+
}
128+
129+
// ToolMetrics represents detailed metrics for a specific tool
130+
type ToolMetrics struct {
131+
ToolName string `json:"name"`
132+
CallCount int64 `json:"call_count"`
133+
SuccessCount int64 `json:"success_count"`
134+
ErrorCount int64 `json:"error_count"`
135+
SuccessRate float64 `json:"success_rate"`
136+
AvgDurationMS float64 `json:"avg_duration_ms"`
137+
P50DurationMS int64 `json:"p50_duration_ms"`
138+
P95DurationMS int64 `json:"p95_duration_ms"`
139+
P99DurationMS int64 `json:"p99_duration_ms"`
140+
LastCalled *time.Time `json:"last_called,omitempty"`
141+
LastError *time.Time `json:"last_error,omitempty"`
142+
}
143+
144+
// ErrorInfo represents detailed information about an error
145+
type ErrorInfo struct {
146+
ID string `json:"id"`
147+
Timestamp time.Time `json:"timestamp"`
148+
ToolName string `json:"tool_name"`
149+
ErrorMessage string `json:"error_message"`
150+
ErrorType string `json:"error_type"`
151+
SessionID *string `json:"session_id,omitempty"`
152+
DurationMS int64 `json:"duration_ms"`
153+
Parameters map[string]interface{} `json:"parameters,omitempty"`
154+
}
155+
156+
// SessionInfo represents detailed information about a session
157+
type SessionInfo struct {
158+
ID string `json:"id"`
159+
StartTime time.Time `json:"start_time"`
160+
EndTime *time.Time `json:"end_time,omitempty"`
161+
DurationMS *int64 `json:"duration_ms,omitempty"`
162+
ToolCallCount int `json:"tool_call_count"`
163+
ResourceAccessCount int `json:"resource_access_count"`
164+
Status string `json:"status"`
165+
ClientName string `json:"client_name"`
166+
ClientVersion string `json:"client_version"`
167+
ProtocolVersion string `json:"protocol_version"`
168+
LastActivity time.Time `json:"last_activity"`
169+
}
170+
171+
// Anomaly represents a detected anomaly
172+
type Anomaly struct {
173+
Type string `json:"type"`
174+
ToolName string `json:"tool_name,omitempty"`
175+
DetectedAt time.Time `json:"detected_at"`
176+
Severity string `json:"severity"`
177+
Description string `json:"description"`
178+
BaselineValue float64 `json:"baseline_value"`
179+
CurrentValue float64 `json:"current_value"`
180+
Confidence float64 `json:"confidence"`
181+
}
182+
183+
// Pagination represents pagination parameters and metadata
184+
type Pagination struct {
185+
Total int64 `json:"total"`
186+
Limit int `json:"limit"`
187+
Offset int `json:"offset"`
188+
}

internal/version/version.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
package version
2+
3+
// Version is populated at build time by GoReleaser.
4+
var Version = "dev"

0 commit comments

Comments
 (0)