Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 28 additions & 36 deletions src/main/java/com/dgu/cap/anomaly/AnomalyDetectionService.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,35 @@ private void checkPod(PodInfo pod) {
String namespace = pod.getNamespace();

Double cpu = prometheusService.getCpu(podName);
if (cpu != null && cpu > thresholds.getCpu()) {
handleAnomaly(pod, AnomalyType.CPU_HIGH, cpu, thresholds.getCpu());
}

Double memory = prometheusService.getMemory(podName);
if (memory != null && memory > thresholds.getMemory()) {
handleAnomaly(pod, AnomalyType.MEMORY_HIGH, memory, thresholds.getMemory());
}
Double errorRate = prometheusService.getErrorRate(podName);

if (pod.getRestartCount() >= thresholds.getRestart()) {
handleAnomaly(pod, AnomalyType.POD_RESTART, pod.getRestartCount(), thresholds.getRestart());
}
boolean hasCpuAnomaly = cpu != null && cpu > thresholds.getCpu();
boolean hasMemoryAnomaly = memory != null && memory > thresholds.getMemory();
boolean hasRestartAnomaly = pod.getRestartCount() >= thresholds.getRestart();
boolean hasErrorRateAnomaly = errorRate != null && errorRate > thresholds.getErrorRate();
boolean hasOomKilled = kubernetesService.hasOomKilled(podName, namespace);
boolean hasCrashLoop = kubernetesService.hasCrashLoopBackOff(podName, namespace);

Double errorRate = prometheusService.getErrorRate(podName);
if (errorRate != null && errorRate > thresholds.getErrorRate()) {
handleAnomaly(pod, AnomalyType.ERROR_RATE_HIGH, errorRate, thresholds.getErrorRate());
if (!hasCpuAnomaly && !hasMemoryAnomaly && !hasRestartAnomaly && !hasErrorRateAnomaly && !hasOomKilled && !hasCrashLoop) {
return;
}

if (kubernetesService.hasOomKilled(podName, namespace)) {
handleAnomaly(pod, AnomalyType.OOM_KILLED, 0, 0);
}
List<Double> cpuHistory = toDoubleList(prometheusService.getCpuHistory(podName));
List<Double> memoryHistory = toDoubleList(prometheusService.getMemoryHistory(podName));
List<Double> errorRateHistory = toDoubleList(prometheusService.getErrorRateHistory(podName));

if (kubernetesService.hasCrashLoopBackOff(podName, namespace)) {
handleAnomaly(pod, AnomalyType.CRASH_LOOP, 0, 0);
}
if (hasCpuAnomaly) handleAnomaly(pod, AnomalyType.CPU_HIGH, cpu, thresholds.getCpu(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
if (hasMemoryAnomaly) handleAnomaly(pod, AnomalyType.MEMORY_HIGH, memory, thresholds.getMemory(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
if (hasRestartAnomaly) handleAnomaly(pod, AnomalyType.POD_RESTART, pod.getRestartCount(), thresholds.getRestart(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
if (hasErrorRateAnomaly) handleAnomaly(pod, AnomalyType.ERROR_RATE_HIGH, errorRate, thresholds.getErrorRate(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
if (hasOomKilled) handleAnomaly(pod, AnomalyType.OOM_KILLED, 0, 0, cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
if (hasCrashLoop) handleAnomaly(pod, AnomalyType.CRASH_LOOP, 0, 0, cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory);
}

private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricValue, double threshold) {
private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricValue, double threshold,
Double cpu, Double memory, Double errorRate,
List<Double> cpuHistory, List<Double> memoryHistory, List<Double> errorRateHistory) {
String podName = pod.getPodName();
String namespace = pod.getNamespace();

Expand All @@ -92,14 +93,10 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa

log.info("이상 탐지 - pod: {}, type: {}, value: {}", podName, anomalyType, metricValue);

List<Double> cpuValues = toDoubleList(prometheusService.getCpuHistory(podName));
List<Double> memoryValues = toDoubleList(prometheusService.getMemoryHistory(podName));
List<Double> errorRateValues = toDoubleList(prometheusService.getErrorRateHistory(podName));

MetricsData metrics = MetricsData.builder()
.cpu(cpuValues.isEmpty() ? List.of(0.0) : cpuValues)
.memory(memoryValues.isEmpty() ? List.of(0.0) : memoryValues)
.errorRate(errorRateValues.isEmpty() ? List.of(0.0) : errorRateValues)
.cpu(cpuHistory.isEmpty() ? List.of(0.0) : cpuHistory)
.memory(memoryHistory.isEmpty() ? List.of(0.0) : memoryHistory)
.errorRate(errorRateHistory.isEmpty() ? List.of(0.0) : errorRateHistory)
.build();

PodData podData = PodData.builder()
Expand All @@ -116,11 +113,6 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa

AiResult aiResult = aiService.analyze(podData);

Double currentCpu = prometheusService.getCpu(podName);
Double currentMemory = prometheusService.getMemory(podName);
Integer restarts = prometheusService.getRestarts(podName);
Double currentErrorRate = prometheusService.getErrorRate(podName);

String similarCasesStr = aiResult.getSimilarCases() != null
? String.join("\n", aiResult.getSimilarCases())
: null;
Expand All @@ -136,10 +128,10 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa
.aiAnalysis(aiResult.getAiAnalysis())
.recommendation(aiResult.getRecommendation())
.similarCases(similarCasesStr)
.cpu(currentCpu != null ? BigDecimal.valueOf(currentCpu) : null)
.memory(currentMemory != null ? BigDecimal.valueOf(currentMemory) : null)
.restarts(restarts)
.errorRate(currentErrorRate != null ? BigDecimal.valueOf(currentErrorRate) : null)
.cpu(cpu != null ? BigDecimal.valueOf(cpu) : null)
.memory(memory != null ? BigDecimal.valueOf(memory) : null)
.restarts(pod.getRestartCount())
.errorRate(errorRate != null ? BigDecimal.valueOf(errorRate) : null)
.build();

ticketService.createTicket(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@

public interface TicketActionLogRepository extends JpaRepository<TicketActionLog, Long> {

List<TicketActionLog> findByTicketIdOrderByCreatedAtDesc(Long ticketId);
List<TicketActionLog> findByTicket_IdOrderByCreatedAtDesc(Long ticketId);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@

public interface TicketMetricSnapshotRepository extends JpaRepository<TicketMetricSnapshot, Long> {

Optional<TicketMetricSnapshot> findByTicketId(Long ticketId);
Optional<TicketMetricSnapshot> findByTicket_Id(Long ticketId);
}
6 changes: 3 additions & 3 deletions src/main/java/com/dgu/cap/ticket/TicketService.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public Ticket createTicket(CreateTicketRequest request) {
.build();

ticket = ticketRepository.save(ticket);
ticket.initTicketNumber(String.format("TKT-%d-%03d", Year.now().getValue(), ticket.getId()));
ticket.initTicketNumber(String.format("TKT-%d-%06d", Year.now().getValue(), ticket.getId()));

if (request.getCpu() != null) {
TicketMetricSnapshot snapshot = TicketMetricSnapshot.builder()
Expand Down Expand Up @@ -86,7 +86,7 @@ public Ticket getTicket(Long id) {
}

public TicketMetricSnapshot getMetricSnapshot(Long ticketId) {
return metricSnapshotRepository.findByTicketId(ticketId).orElse(null);
return metricSnapshotRepository.findByTicket_Id(ticketId).orElse(null);
}

@Transactional
Expand All @@ -108,7 +108,7 @@ public Ticket updateStatus(Long id, UpdateStatusRequest request) {
}

public List<TicketActionLog> getActionLogs(Long ticketId) {
return actionLogRepository.findByTicketIdOrderByCreatedAtDesc(ticketId);
return actionLogRepository.findByTicket_IdOrderByCreatedAtDesc(ticketId);
}

}
Loading