From d93497d292c93fe2b86e13a716e5bd5831698f49 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:35:50 +0000 Subject: [PATCH 1/3] Initial plan From e9e1b9ec418242d3b644e941e1fde70cb0e8f903 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:38:23 +0000 Subject: [PATCH 2/3] Add server health monitoring script Co-authored-by: Ashwinmnr <28872446+Ashwinmnr@users.noreply.github.com> --- server_health_check.sh | 83 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 server_health_check.sh diff --git a/server_health_check.sh b/server_health_check.sh new file mode 100755 index 0000000..df98809 --- /dev/null +++ b/server_health_check.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Server Health Monitoring Script +# Checks disk space, CPU usage, and memory usage +# Returns "healthy" if all metrics are below 60%, "unhealthy" otherwise +# Usage: ./server_health_check.sh [explain] + +THRESHOLD=60 + +# Function to get disk usage percentage (highest mount point) +get_disk_usage() { + df -h | grep -vE '^Filesystem|tmpfs|cdrom|loop' | awk '{ print $5 }' | sed 's/%//g' | sort -rn | head -1 +} + +# Function to get CPU usage percentage +get_cpu_usage() { + # Using top to get CPU usage (100 - idle percentage) + # We'll sample for 2 seconds to get an accurate reading + top -bn2 -d 1 | grep "Cpu(s)" | tail -1 | awk '{print $2}' | sed 's/%us,//g' | awk '{printf "%.0f", $1}' +} + +# Function to get memory usage percentage +get_memory_usage() { + free | grep Mem | awk '{printf "%.0f", ($3/$2) * 100.0}' +} + +# Get current metrics +DISK_USAGE=$(get_disk_usage) +CPU_USAGE=$(get_cpu_usage) +MEMORY_USAGE=$(get_memory_usage) + +# Check if any metric exceeds threshold +UNHEALTHY=0 +REASONS=() + +if [ "$DISK_USAGE" -ge "$THRESHOLD" ]; then + UNHEALTHY=1 + REASONS+=("Disk usage is ${DISK_USAGE}% (threshold: ${THRESHOLD}%)") +fi + +if [ "$CPU_USAGE" -ge "$THRESHOLD" ]; then + UNHEALTHY=1 + REASONS+=("CPU usage is ${CPU_USAGE}% (threshold: ${THRESHOLD}%)") +fi + +if [ "$MEMORY_USAGE" -ge "$THRESHOLD" ]; then + UNHEALTHY=1 + REASONS+=("Memory usage is ${MEMORY_USAGE}% (threshold: ${THRESHOLD}%)") +fi + +# Determine health status +if [ "$UNHEALTHY" -eq 1 ]; then + STATUS="unhealthy" +else + STATUS="healthy" +fi + +# Output based on argument +if [ "$1" == "explain" ]; then + echo "Server Status: $STATUS" + echo "" + echo "Current Metrics:" + echo " - Disk usage: ${DISK_USAGE}%" + echo " - CPU usage: ${CPU_USAGE}%" + echo " - Memory usage: ${MEMORY_USAGE}%" + echo "" + echo "Threshold: ${THRESHOLD}%" + echo "" + + if [ "$UNHEALTHY" -eq 1 ]; then + echo "Reasons for unhealthy status:" + for reason in "${REASONS[@]}"; do + echo " - $reason" + done + else + echo "All metrics are below the ${THRESHOLD}% threshold." + fi +else + echo "$STATUS" +fi + +# Exit with appropriate code +exit $UNHEALTHY From da152ed2d8df1d5eaf034467c757ca1aff849948 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:40:21 +0000 Subject: [PATCH 3/3] Add documentation for server health monitoring script Co-authored-by: Ashwinmnr <28872446+Ashwinmnr@users.noreply.github.com> --- README_SERVER_HEALTH.md | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 README_SERVER_HEALTH.md diff --git a/README_SERVER_HEALTH.md b/README_SERVER_HEALTH.md new file mode 100644 index 0000000..3038612 --- /dev/null +++ b/README_SERVER_HEALTH.md @@ -0,0 +1,55 @@ +# Server Health Monitoring Script + +## Overview +This script monitors the health of a Linux server by checking disk space, CPU usage, and memory usage against a 60% threshold. + +## Usage + +### Basic Usage +```bash +./server_health_check.sh +``` +Returns either `healthy` or `unhealthy` based on current system metrics. + +### Detailed Explanation +```bash +./server_health_check.sh explain +``` +Returns the health status along with: +- Current metrics for disk, CPU, and memory usage +- Threshold value (60%) +- Detailed reasons if the server is unhealthy + +## Health Criteria +- **Healthy**: All metrics (disk, CPU, memory) are below 60% +- **Unhealthy**: One or more metrics exceed 60% + +## Exit Codes +- `0`: Server is healthy +- `1`: Server is unhealthy + +## Example Output + +### Without explain argument: +``` +healthy +``` + +### With explain argument: +``` +Server Status: unhealthy + +Current Metrics: + - Disk usage: 76% + - CPU usage: 15% + - Memory usage: 45% + +Threshold: 60% + +Reasons for unhealthy status: + - Disk usage is 76% (threshold: 60%) +``` + +## Requirements +- Linux operating system +- Standard utilities: `df`, `free`, `top`, `awk`, `grep`, `sed`