diff --git a/src/main/java/com/dgu/cap/anomaly/AnomalyDetectionService.java b/src/main/java/com/dgu/cap/anomaly/AnomalyDetectionService.java index 951b49d..c815588 100644 --- a/src/main/java/com/dgu/cap/anomaly/AnomalyDetectionService.java +++ b/src/main/java/com/dgu/cap/anomaly/AnomalyDetectionService.java @@ -55,34 +55,35 @@ private void checkPod(PodInfo pod) { String namespace = pod.getNamespace(); Double cpu = prometheusService.getCpu(podName); - if (cpu != null && cpu > thresholds.getCpu()) { - handleAnomaly(pod, AnomalyType.CPU_HIGH, cpu, thresholds.getCpu()); - } - Double memory = prometheusService.getMemory(podName); - if (memory != null && memory > thresholds.getMemory()) { - handleAnomaly(pod, AnomalyType.MEMORY_HIGH, memory, thresholds.getMemory()); - } + Double errorRate = prometheusService.getErrorRate(podName); - if (pod.getRestartCount() >= thresholds.getRestart()) { - handleAnomaly(pod, AnomalyType.POD_RESTART, pod.getRestartCount(), thresholds.getRestart()); - } + boolean hasCpuAnomaly = cpu != null && cpu > thresholds.getCpu(); + boolean hasMemoryAnomaly = memory != null && memory > thresholds.getMemory(); + boolean hasRestartAnomaly = pod.getRestartCount() >= thresholds.getRestart(); + boolean hasErrorRateAnomaly = errorRate != null && errorRate > thresholds.getErrorRate(); + boolean hasOomKilled = kubernetesService.hasOomKilled(podName, namespace); + boolean hasCrashLoop = kubernetesService.hasCrashLoopBackOff(podName, namespace); - Double errorRate = prometheusService.getErrorRate(podName); - if (errorRate != null && errorRate > thresholds.getErrorRate()) { - handleAnomaly(pod, AnomalyType.ERROR_RATE_HIGH, errorRate, thresholds.getErrorRate()); + if (!hasCpuAnomaly && !hasMemoryAnomaly && !hasRestartAnomaly && !hasErrorRateAnomaly && !hasOomKilled && !hasCrashLoop) { + return; } - if (kubernetesService.hasOomKilled(podName, namespace)) { - handleAnomaly(pod, AnomalyType.OOM_KILLED, 0, 0); - } + List cpuHistory = toDoubleList(prometheusService.getCpuHistory(podName)); + List memoryHistory = toDoubleList(prometheusService.getMemoryHistory(podName)); + List errorRateHistory = toDoubleList(prometheusService.getErrorRateHistory(podName)); - if (kubernetesService.hasCrashLoopBackOff(podName, namespace)) { - handleAnomaly(pod, AnomalyType.CRASH_LOOP, 0, 0); - } + if (hasCpuAnomaly) handleAnomaly(pod, AnomalyType.CPU_HIGH, cpu, thresholds.getCpu(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); + if (hasMemoryAnomaly) handleAnomaly(pod, AnomalyType.MEMORY_HIGH, memory, thresholds.getMemory(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); + if (hasRestartAnomaly) handleAnomaly(pod, AnomalyType.POD_RESTART, pod.getRestartCount(), thresholds.getRestart(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); + if (hasErrorRateAnomaly) handleAnomaly(pod, AnomalyType.ERROR_RATE_HIGH, errorRate, thresholds.getErrorRate(), cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); + if (hasOomKilled) handleAnomaly(pod, AnomalyType.OOM_KILLED, 0, 0, cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); + if (hasCrashLoop) handleAnomaly(pod, AnomalyType.CRASH_LOOP, 0, 0, cpu, memory, errorRate, cpuHistory, memoryHistory, errorRateHistory); } - private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricValue, double threshold) { + private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricValue, double threshold, + Double cpu, Double memory, Double errorRate, + List cpuHistory, List memoryHistory, List errorRateHistory) { String podName = pod.getPodName(); String namespace = pod.getNamespace(); @@ -92,14 +93,10 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa log.info("이상 탐지 - pod: {}, type: {}, value: {}", podName, anomalyType, metricValue); - List cpuValues = toDoubleList(prometheusService.getCpuHistory(podName)); - List memoryValues = toDoubleList(prometheusService.getMemoryHistory(podName)); - List errorRateValues = toDoubleList(prometheusService.getErrorRateHistory(podName)); - MetricsData metrics = MetricsData.builder() - .cpu(cpuValues.isEmpty() ? List.of(0.0) : cpuValues) - .memory(memoryValues.isEmpty() ? List.of(0.0) : memoryValues) - .errorRate(errorRateValues.isEmpty() ? List.of(0.0) : errorRateValues) + .cpu(cpuHistory.isEmpty() ? List.of(0.0) : cpuHistory) + .memory(memoryHistory.isEmpty() ? List.of(0.0) : memoryHistory) + .errorRate(errorRateHistory.isEmpty() ? List.of(0.0) : errorRateHistory) .build(); PodData podData = PodData.builder() @@ -116,11 +113,6 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa AiResult aiResult = aiService.analyze(podData); - Double currentCpu = prometheusService.getCpu(podName); - Double currentMemory = prometheusService.getMemory(podName); - Integer restarts = prometheusService.getRestarts(podName); - Double currentErrorRate = prometheusService.getErrorRate(podName); - String similarCasesStr = aiResult.getSimilarCases() != null ? String.join("\n", aiResult.getSimilarCases()) : null; @@ -136,10 +128,10 @@ private void handleAnomaly(PodInfo pod, AnomalyType anomalyType, double metricVa .aiAnalysis(aiResult.getAiAnalysis()) .recommendation(aiResult.getRecommendation()) .similarCases(similarCasesStr) - .cpu(currentCpu != null ? BigDecimal.valueOf(currentCpu) : null) - .memory(currentMemory != null ? BigDecimal.valueOf(currentMemory) : null) - .restarts(restarts) - .errorRate(currentErrorRate != null ? BigDecimal.valueOf(currentErrorRate) : null) + .cpu(cpu != null ? BigDecimal.valueOf(cpu) : null) + .memory(memory != null ? BigDecimal.valueOf(memory) : null) + .restarts(pod.getRestartCount()) + .errorRate(errorRate != null ? BigDecimal.valueOf(errorRate) : null) .build(); ticketService.createTicket(request); diff --git a/src/main/java/com/dgu/cap/ticket/TicketActionLogRepository.java b/src/main/java/com/dgu/cap/ticket/TicketActionLogRepository.java index 49effb7..43c9830 100644 --- a/src/main/java/com/dgu/cap/ticket/TicketActionLogRepository.java +++ b/src/main/java/com/dgu/cap/ticket/TicketActionLogRepository.java @@ -6,5 +6,5 @@ public interface TicketActionLogRepository extends JpaRepository { - List findByTicketIdOrderByCreatedAtDesc(Long ticketId); + List findByTicket_IdOrderByCreatedAtDesc(Long ticketId); } diff --git a/src/main/java/com/dgu/cap/ticket/TicketMetricSnapshotRepository.java b/src/main/java/com/dgu/cap/ticket/TicketMetricSnapshotRepository.java index e8f308d..661313d 100644 --- a/src/main/java/com/dgu/cap/ticket/TicketMetricSnapshotRepository.java +++ b/src/main/java/com/dgu/cap/ticket/TicketMetricSnapshotRepository.java @@ -6,5 +6,5 @@ public interface TicketMetricSnapshotRepository extends JpaRepository { - Optional findByTicketId(Long ticketId); + Optional findByTicket_Id(Long ticketId); } diff --git a/src/main/java/com/dgu/cap/ticket/TicketService.java b/src/main/java/com/dgu/cap/ticket/TicketService.java index 8d0c34f..de39634 100644 --- a/src/main/java/com/dgu/cap/ticket/TicketService.java +++ b/src/main/java/com/dgu/cap/ticket/TicketService.java @@ -47,7 +47,7 @@ public Ticket createTicket(CreateTicketRequest request) { .build(); ticket = ticketRepository.save(ticket); - ticket.initTicketNumber(String.format("TKT-%d-%03d", Year.now().getValue(), ticket.getId())); + ticket.initTicketNumber(String.format("TKT-%d-%06d", Year.now().getValue(), ticket.getId())); if (request.getCpu() != null) { TicketMetricSnapshot snapshot = TicketMetricSnapshot.builder() @@ -86,7 +86,7 @@ public Ticket getTicket(Long id) { } public TicketMetricSnapshot getMetricSnapshot(Long ticketId) { - return metricSnapshotRepository.findByTicketId(ticketId).orElse(null); + return metricSnapshotRepository.findByTicket_Id(ticketId).orElse(null); } @Transactional @@ -108,7 +108,7 @@ public Ticket updateStatus(Long id, UpdateStatusRequest request) { } public List getActionLogs(Long ticketId) { - return actionLogRepository.findByTicketIdOrderByCreatedAtDesc(ticketId); + return actionLogRepository.findByTicket_IdOrderByCreatedAtDesc(ticketId); } }