Skip to content

Commit bc33282

Browse files
feat(metrics): add server system limits with load average support
- Update gophpeek/system-metrics to v1.4.0 for 21x faster CPU metrics - Add system resource limits to server metrics (CPU cores, memory, load avg) - Implement 5-second cache for CPU metrics to avoid macOS performance issues - Restructure server response to clearly separate worker vs system metrics - Add load average metrics (1min, 5min, 15min) via new SystemMetrics API - Update API usage to handle v1.4.0 breaking changes System limits now include: - CPU: cores, usage_percent, load_average - Memory: total_mb, used_mb, available_mb, usage_percent Performance improvements: - CPU metrics: 2300ms → 105ms on macOS (21x faster via FFI) - Load average: 12x faster via native FFI calls - Static caching prevents repeated expensive syscalls
1 parent 004f443 commit bc33282

File tree

3 files changed

+149
-8
lines changed

3 files changed

+149
-8
lines changed

src/Services/OverviewQueryService.php

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ private function filterJobsForDashboard(array $jobs): array
176176
/**
177177
* Filter server metrics to essential dashboard fields only.
178178
*
179+
* Returns simplified server data with clear separation between:
180+
* - Worker metrics: Job counts, worker utilization (from queue workers)
181+
* - System limits: CPU cores, total memory (physical server resources)
182+
*
179183
* @param array<string, array<string, mixed>> $servers
180184
* @return array<string, array<string, mixed>>
181185
*/
@@ -185,18 +189,32 @@ private function filterServersForDashboard(array $servers): array
185189
$workers = is_array($server['workers'] ?? null) ? $server['workers'] : [];
186190
$utilization = is_array($server['utilization'] ?? null) ? $server['utilization'] : [];
187191
$performance = is_array($server['performance'] ?? null) ? $server['performance'] : [];
192+
$systemLimits = is_array($server['system_limits'] ?? null) ? $server['system_limits'] : null;
188193

189194
$serverUtilization = $utilization['server_utilization'] ?? 0;
190195
$utilizationPercent = is_numeric($serverUtilization) ? round((float) $serverUtilization * 100, 2) : 0;
191196

192-
return [
197+
$result = [
193198
'hostname' => $server['hostname'] ?? '',
194-
'workers_total' => $workers['total'] ?? 0,
195-
'workers_active' => $workers['active'] ?? 0,
196-
'workers_idle' => $workers['idle'] ?? 0,
197-
'utilization_percent' => $utilizationPercent,
198-
'jobs_processed' => $performance['total_jobs_processed'] ?? 0,
199+
// Worker-level metrics (from queue workers)
200+
'workers' => [
201+
'total' => $workers['total'] ?? 0,
202+
'active' => $workers['active'] ?? 0,
203+
'idle' => $workers['idle'] ?? 0,
204+
'utilization_percent' => $utilizationPercent,
205+
],
206+
// Job processing metrics (from queue workers)
207+
'jobs' => [
208+
'processed' => $performance['total_jobs_processed'] ?? 0,
209+
],
199210
];
211+
212+
// System resource limits (physical server capacity)
213+
if ($systemLimits !== null) {
214+
$result['system_limits'] = $systemLimits;
215+
}
216+
217+
return $result;
200218
}, $servers);
201219
}
202220

src/Services/ServerMetricsService.php

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,121 @@
99
/**
1010
* Service for collecting server-wide resource metrics.
1111
*/
12-
final readonly class ServerMetricsService
12+
final class ServerMetricsService
1313
{
14+
/** @var array<string, mixed>|null */
15+
private static ?array $cachedMetrics = null;
16+
17+
private static ?int $cacheTimestamp = null;
18+
19+
private const CACHE_TTL_SECONDS = 5; // Cache CPU metrics for 5 seconds
20+
21+
/**
22+
* Get current server resource limits and usage (with caching for performance).
23+
*
24+
* Returns system limits and current usage with 5-second cache to avoid
25+
* macOS CPU polling performance issues (2+ seconds per call).
26+
*
27+
* Includes:
28+
* - Memory: total, used, available, usage_percent (always fresh, fast)
29+
* - CPU: cores, usage_percent (cached for 5 seconds to avoid slow polling)
30+
*
31+
* @return array<string, mixed>
32+
*/
33+
public function getSystemLimits(): array
34+
{
35+
// Check if cached metrics are still valid (within TTL)
36+
$now = time();
37+
$cacheValid = self::$cachedMetrics !== null
38+
&& self::$cacheTimestamp !== null
39+
&& ($now - self::$cacheTimestamp) < self::CACHE_TTL_SECONDS;
40+
41+
if ($cacheValid && self::$cachedMetrics !== null) {
42+
// Return cached metrics (includes CPU usage from previous call)
43+
return self::$cachedMetrics;
44+
}
45+
46+
// Cache expired or not set - fetch fresh metrics
47+
try {
48+
$result = SystemMetrics::overview();
49+
50+
if (! $result->isSuccess()) {
51+
return [
52+
'available' => false,
53+
'error' => 'Failed to collect system metrics',
54+
];
55+
}
56+
57+
$overview = $result->getValue();
58+
59+
// CPU metrics (now 21x faster on macOS with FFI!)
60+
$cpuCores = $overview->cpu->coreCount();
61+
// Note: v1.4.0 breaking change - usagePercentage() now returns 0-100% directly
62+
$cpuUsagePercent = 0.0;
63+
if ($overview->cpu->total->total() > 0) {
64+
$cpuUsagePercent = ($overview->cpu->total->busy() / $overview->cpu->total->total()) * 100;
65+
}
66+
67+
// Memory metrics (fast, always fresh)
68+
$memoryTotalMb = $overview->memory->totalBytes / (1024 * 1024);
69+
$memoryUsedMb = $overview->memory->usedBytes / (1024 * 1024);
70+
$memoryAvailableMb = $memoryTotalMb - $memoryUsedMb;
71+
$memoryUsagePercent = $overview->memory->usedPercentage();
72+
73+
// Load average (new in v1.4.0 with FFI - 12x faster!)
74+
// Load average is a separate facade method, not part of overview
75+
$loadAverage = [
76+
'1min' => 0.0,
77+
'5min' => 0.0,
78+
'15min' => 0.0,
79+
];
80+
try {
81+
$loadResult = SystemMetrics::loadAverage();
82+
if ($loadResult->isSuccess()) {
83+
$load = $loadResult->getValue();
84+
$loadAverage = [
85+
'1min' => $load->oneMinute,
86+
'5min' => $load->fiveMinutes,
87+
'15min' => $load->fifteenMinutes,
88+
];
89+
}
90+
} catch (\Throwable $e) {
91+
// Load average is optional, continue with zeros if unavailable
92+
}
93+
94+
$metrics = [
95+
'available' => true,
96+
'cpu' => [
97+
'cores' => $cpuCores,
98+
'usage_percent' => round($cpuUsagePercent, 2),
99+
'load_average' => $loadAverage,
100+
],
101+
'memory' => [
102+
'total_mb' => round($memoryTotalMb, 2),
103+
'used_mb' => round($memoryUsedMb, 2),
104+
'available_mb' => round($memoryAvailableMb, 2),
105+
'usage_percent' => round($memoryUsagePercent, 2),
106+
],
107+
];
108+
109+
// Cache the metrics
110+
self::$cachedMetrics = $metrics;
111+
self::$cacheTimestamp = $now;
112+
113+
return $metrics;
114+
} catch (\Throwable $e) {
115+
return [
116+
'available' => false,
117+
'error' => 'Exception collecting system metrics: '.$e->getMessage(),
118+
];
119+
}
120+
}
121+
14122
/**
15-
* Get current server resource metrics.
123+
* Get current server resource metrics (including usage).
124+
*
125+
* WARNING: On macOS, CPU usage calculation can be slow (1-2 seconds).
126+
* Consider using getSystemLimits() for overview/dashboard endpoints.
16127
*
17128
* @return array<string, mixed>
18129
*/

src/Services/WorkerMetricsQueryService.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ public function __construct(
2222
private WorkerHeartbeatRepository $workerHeartbeatRepository,
2323
private JobMetricsRepository $jobMetricsRepository,
2424
private TrendAnalysisService $trendAnalysis,
25+
private ServerMetricsService $serverMetricsService,
2526
) {}
2627

2728
/**
@@ -281,6 +282,17 @@ public function getAllServersWithMetrics(): array
281282
'Consider reducing worker count to optimize resource usage';
282283
}
283284
}
285+
286+
// Add system resource limits (only for current server)
287+
// Note: This gets system limits for the current server running this code
288+
// For multi-server setups, we'd need each server to report its own limits
289+
// Uses fast getSystemLimits() to avoid macOS CPU polling performance issues
290+
if ($hostname === gethostname()) {
291+
$systemLimits = $this->serverMetricsService->getSystemLimits();
292+
if ($systemLimits['available']) {
293+
$server['system_limits'] = $systemLimits;
294+
}
295+
}
284296
}
285297

286298
return $servers;

0 commit comments

Comments
 (0)