From 232e56731f1a0e098aaa76d174ba54af5830a003 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Tue, 15 Jul 2025 13:15:24 +0200 Subject: [PATCH 01/13] feat: add renku-ci-cleanup chart --- .github/workflows/docker-build.yml | 59 ++++ .github/workflows/helm-chart.yml | 56 ++++ ci-deployment-cleanup/Dockerfile | 52 ++++ ci-deployment-cleanup/README.md | 51 ++++ ci-deployment-cleanup/helm-chart/Chart.yaml | 18 ++ .../helm-chart/templates/_helpers.tpl | 62 ++++ .../helm-chart/templates/configmap.yaml | 270 ++++++++++++++++++ .../helm-chart/templates/cronjob.yaml | 81 ++++++ .../helm-chart/templates/rbac.yaml | 45 +++ .../helm-chart/templates/secret.yaml | 13 + .../helm-chart/templates/serviceaccount.yaml | 12 + ci-deployment-cleanup/helm-chart/values.yaml | 105 +++++++ 12 files changed, 824 insertions(+) create mode 100644 .github/workflows/docker-build.yml create mode 100644 .github/workflows/helm-chart.yml create mode 100644 ci-deployment-cleanup/Dockerfile create mode 100644 ci-deployment-cleanup/README.md create mode 100644 ci-deployment-cleanup/helm-chart/Chart.yaml create mode 100644 ci-deployment-cleanup/helm-chart/templates/_helpers.tpl create mode 100644 ci-deployment-cleanup/helm-chart/templates/configmap.yaml create mode 100644 ci-deployment-cleanup/helm-chart/templates/cronjob.yaml create mode 100644 ci-deployment-cleanup/helm-chart/templates/rbac.yaml create mode 100644 ci-deployment-cleanup/helm-chart/templates/secret.yaml create mode 100644 ci-deployment-cleanup/helm-chart/templates/serviceaccount.yaml create mode 100644 ci-deployment-cleanup/helm-chart/values.yaml diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..b4391f2 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,59 @@ +name: Build and Push CI Deployment Cleanup Docker Image + +on: + push: + tags: [ 'ci-deployment-cleanup-v*' ] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: swissdatasciencecenter/renku-ci-cleanup + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract version from tag + id: version + run: | + VERSION=$(echo "${{ github.ref_name }}" | sed 's/ci-deployment-cleanup-v//') + echo "version=$VERSION" >> $GITHUB_OUTPUT + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=${{ steps.version.outputs.version }} + type=raw,value=latest + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ci-deployment-cleanup/Dockerfile + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/helm-chart.yml b/.github/workflows/helm-chart.yml new file mode 100644 index 0000000..86bc77a --- /dev/null +++ b/.github/workflows/helm-chart.yml @@ -0,0 +1,56 @@ +name: Package and Push Helm Chart + +on: + push: + tags: [ 'ci-deployment-cleanup-v*' ] + workflow_dispatch: + +env: + REGISTRY: ghcr.io + +jobs: + helm-chart: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: '3.14.0' + + - name: Log in to Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Determine app version + id: version + run: | + VERSION=$(echo "${{ github.ref_name }}" | sed 's/ci-deployment-cleanup-v//') + echo "appVersion=$VERSION" >> $GITHUB_OUTPUT + + - name: Update Chart.yaml with app version + run: | + cd ci-deployment-cleanup/helm-chart + sed -i "s/appVersion: .*/appVersion: \"${{ steps.version.outputs.appVersion }}\"/" Chart.yaml + + - name: Lint Helm chart + run: | + cd ci-deployment-cleanup + helm lint helm-chart/ + + - name: Package and push Helm chart + run: | + cd ci-deployment-cleanup + helm package helm-chart/ + helm push *.tgz oci://${{ env.REGISTRY }}/swissdatasciencecenter/helm-charts diff --git a/ci-deployment-cleanup/Dockerfile b/ci-deployment-cleanup/Dockerfile new file mode 100644 index 0000000..23688d0 --- /dev/null +++ b/ci-deployment-cleanup/Dockerfile @@ -0,0 +1,52 @@ +FROM golang:1.24-alpine AS builder + +RUN apk add --no-cache make bash + +WORKDIR /app + +# Copy renku-dev-utils files +COPY . . + +# Build the rdu binary +RUN make rdu + +FROM alpine:3.18 + +RUN apk add --no-cache \ + bash \ + curl \ + ca-certificates \ + jq \ + openssl \ + && ARCH=$(case $(uname -m) in x86_64) echo amd64;; aarch64) echo arm64;; *) echo amd64;; esac) \ + && curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl" \ + && chmod +x kubectl \ + && mv kubectl /usr/local/bin/ + +RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \ + && chmod 700 get_helm.sh \ + && ./get_helm.sh \ + && rm get_helm.sh + +# Copy the rdu binary from builder stage +COPY --from=builder /app/build/renku-dev-utils /usr/local/bin/rdu + +# Make rdu executable +RUN chmod +x /usr/local/bin/rdu + +# Create a non-root user +RUN addgroup -g 1000 appuser && \ + adduser -u 1000 -G appuser -s /bin/bash -D appuser + +# Switch to non-root user +USER appuser + +# Set working directory +WORKDIR /home/appuser + +# Verify installations +RUN rdu version || echo "rdu installed" && \ + kubectl version --client && \ + helm version + +CMD ["/bin/bash"] diff --git a/ci-deployment-cleanup/README.md b/ci-deployment-cleanup/README.md new file mode 100644 index 0000000..5a245b0 --- /dev/null +++ b/ci-deployment-cleanup/README.md @@ -0,0 +1,51 @@ +# Renku CI Deployment Cleanup + +A Kubernetes-based CI deployment cleanup system that uses a Helm chart to deploy automated cleanup of old Renku CI deployments. This system runs as a CronJob that leverages the `rdu` tool for comprehensive cleanup. + +## Installation + +Install the Helm chart: +```bash +helm install renku-ci-cleanup ./helm-chart +``` + +## Exemption + +Namespaces can be exempted from cleanup by adding the label `renku.io/cleanup-exempt: "true"` to the namespace. + +## How It Works + +1. The CronJob runs on the specified schedule (default: every 6 hours) +2. It queries Kubernetes for ALL namespaces in the cluster +3. For each namespace found: + - Checks if the namespace has the exemption label (if so, skips it) + - Checks if the namespace name matches any of the configured patterns (if enforcement is enabled) + - Calculates the age based on the namespace creation timestamp + - Checks GitHub PR status for PR-based cleanup (if enabled) + - If the namespace is older than the configured threshold AND matches the naming patterns AND is not exempt, it uses `rdu cleanup-deployment` to: + - Delete all sessions + - Uninstall all Helm releases + - Delete all jobs and PVCs + - Delete the entire namespace +4. Logging shows what actions were taken, including exemption and pattern matching results + +## Key Configuration + +The main configuration options in `values.yaml`: + +- `cleanup.maxAge`: Maximum age in hours before cleanup (default: 720 hours / 30 days) +- `cleanup.dryRun`: Enable dry-run mode (default: false) +- `cleanup.namespacePatterns`: List of regex patterns for namespace names +- `cleanup.enforceNamePatterns`: Enable strict pattern matching (default: true) +- `cleanup.prCleanup.enabled`: Enable GitHub PR-based cleanup (default: false) +- `cronJob.schedule`: Cron schedule (default: "0 */6 * * *" - every 6 hours) + +## PR-Based Cleanup + +The system supports GitHub PR-based cleanup that can automatically clean up namespaces when their associated pull requests are closed or merged. This feature requires: + +- `cleanup.prCleanup.enabled: true` +- GitHub API token configured +- Repository mappings in `cleanup.prCleanup.repositories` + +Example configuration maps namespace patterns to GitHub repositories and PR numbers. diff --git a/ci-deployment-cleanup/helm-chart/Chart.yaml b/ci-deployment-cleanup/helm-chart/Chart.yaml new file mode 100644 index 0000000..b2930d1 --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: renku-ci-cleanup +description: A Helm chart for cleaning up old Renku CI deployments +type: application +version: 1.0.0 +appVersion: "1.0.0" +keywords: + - renku + - ci + - cleanup + - deployment +home: https://github.com/SwissDataScienceCenter/renku-dev-utils +sources: + - https://github.com/SwissDataScienceCenter/renku-dev-utils +maintainers: + - name: Renku Team + email: hello@renku.io + url: https://renkulab.io diff --git a/ci-deployment-cleanup/helm-chart/templates/_helpers.tpl b/ci-deployment-cleanup/helm-chart/templates/_helpers.tpl new file mode 100644 index 0000000..f949709 --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "renku-ci-cleanup.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "renku-ci-cleanup.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "renku-ci-cleanup.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "renku-ci-cleanup.labels" -}} +helm.sh/chart: {{ include "renku-ci-cleanup.chart" . }} +{{ include "renku-ci-cleanup.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "renku-ci-cleanup.selectorLabels" -}} +app.kubernetes.io/name: {{ include "renku-ci-cleanup.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "renku-ci-cleanup.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "renku-ci-cleanup.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/templates/configmap.yaml b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml new file mode 100644 index 0000000..e924b1c --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml @@ -0,0 +1,270 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "renku-ci-cleanup.fullname" . }}-script + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} +data: + cleanup.sh: | + #!/bin/bash + set -e + + echo "Starting Renku CI deployment cleanup..." + echo "Max age: {{ .Values.cleanup.maxAge }} hours" + echo "Exemption label: {{ .Values.cleanup.exemptionLabel }}" + {{- if .Values.cleanup.enforceNamePatterns }} + echo "Name pattern enforcement: enabled" + echo "Allowed patterns:" + {{- range .Values.cleanup.namespacePatterns }} + echo " - {{ . }}" + {{- end }} + {{- else }} + echo "Name pattern enforcement: disabled" + {{- end }} + + {{- if .Values.cleanup.prCleanup.enabled }} + echo "PR-based cleanup: enabled" + echo "Repository mappings:" + {{- range .Values.cleanup.prCleanup.repositories }} + echo " - {{ .namespacePattern }} -> {{ .repo }}" + {{- end }} + {{- else }} + echo "PR-based cleanup: disabled" + {{- end }} + + {{- if .Values.cleanup.dryRun }} + echo "DRY RUN MODE: No actual deletions will be performed" + {{- end }} + + # Function to calculate age in seconds + calculate_age() { + local timestamp="$1" + local current_time=$(date +%s) + + local creation_time + if command -v gdate >/dev/null 2>&1; then + creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") + else + # Format: 2025-05-28T13:50:39Z + local year month day hour minute second + year=$(echo "$timestamp" | cut -d'-' -f1) + month=$(echo "$timestamp" | cut -d'-' -f2) + day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) + hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) + minute=$(echo "$timestamp" | cut -d':' -f2) + second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') + + local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" + creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") + fi + + if [ "$creation_time" = "0" ]; then + echo "0" + else + echo $((current_time - creation_time)) + fi + } + + # Function to format age for display + format_age() { + local age_seconds="$1" + local age_hours=$((age_seconds / 3600)) + local age_days=$((age_hours / 24)) + + if [ $age_days -gt 0 ]; then + echo "${age_days}d $((age_hours % 24))h" + else + echo "${age_hours}h" + fi + } + + # Function to format hours to days+hours for thresholds + format_hours_threshold() { + local hours="$1" + local days=$((hours / 24)) + + if [ $days -gt 0 ]; then + echo "${days}d ($((hours % 24))h)" + else + echo "${hours}h" + fi + } + + # Function to check if namespace matches any allowed pattern + matches_pattern() { + local namespace="$1" + {{- if .Values.cleanup.enforceNamePatterns }} + {{- range .Values.cleanup.namespacePatterns }} + if [[ "$namespace" =~ {{ . }} ]]; then + return 0 + fi + {{- end }} + return 1 + {{- else }} + # Pattern enforcement disabled, always return true + return 0 + {{- end }} + } + + {{- if .Values.cleanup.prCleanup.enabled }} + # Function to check PR status on GitHub + check_pr_status() { + local namespace="$1" + local github_token="${GITHUB_TOKEN}" + + if [ -z "$github_token" ]; then + echo " → GitHub token not configured, skipping PR status check" + return 1 + fi + + # Check each repository mapping + {{- range .Values.cleanup.prCleanup.repositories }} + if [[ "$namespace" =~ {{ .namespacePattern }} ]]; then + local pr_number + {{- if .suffixPattern }} + pr_number=$(echo "$namespace" | sed -E 's/{{ .namespacePattern }}/\1/') + {{- else }} + pr_number="${BASH_REMATCH[1]}" + {{- end }} + + if [ -z "$pr_number" ]; then + echo " → Could not extract PR number from namespace $namespace" + return 1 + fi + + echo " → Checking PR #$pr_number status in {{ .repo }}" + + # Query GitHub API for PR status + local pr_response + pr_response=$(curl -s -H "Authorization: token $github_token" \ + "https://api.github.com/repos/{{ .repo }}/pulls/$pr_number" 2>/dev/null) + + if [ $? -ne 0 ]; then + echo " → Failed to query GitHub API for PR #$pr_number" + return 1 + fi + + # Check if PR exists and get its state + local pr_state + pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') + + if [ -z "$pr_state" ]; then + echo " → PR #$pr_number not found in {{ .repo }}" + PR_CLEANUP_REASON="PR #$pr_number not found in {{ .repo }}" + return 0 + fi + + echo " → PR #$pr_number state: $pr_state" + + # Check if PR is closed or merged + if [ "$pr_state" = "closed" ]; then + # For closed PRs, check if it was merged + local merged + merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') + if [ "$merged" = "true" ]; then + echo " → PR #$pr_number is merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is merged in {{ .repo }}" + else + echo " → PR #$pr_number is closed but not merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in {{ .repo }}" + fi + return 0 + elif [ "$pr_state" = "open" ]; then + echo " → PR #$pr_number is still open, skipping cleanup" + return 1 + else + echo " → PR #$pr_number has unknown state: $pr_state, skipping cleanup" + return 1 + fi + fi + {{- end }} + + echo " → Namespace $namespace does not match any PR cleanup patterns" + return 1 # No matching pattern + } + {{- end }} + + # Get maximum age in seconds + MAX_AGE_SECONDS=$(( {{ .Values.cleanup.maxAge }} * 3600 )) + + # Find and process all namespaces + kubectl get namespaces \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | \ + while IFS=$'\t' read -r namespace timestamp labels; do + if [ -z "$namespace" ] || [ -z "$timestamp" ]; then + continue + fi + + age_seconds=$(calculate_age "$timestamp") + age_display=$(format_age "$age_seconds") + + echo "Checking namespace: $namespace (age: $age_display)" + + # Check if namespace is exempt from cleanup + if [[ "$labels" == *"{{ .Values.cleanup.exemptionLabel | replace "=" "\\=" }}"* ]]; then + echo " → Namespace $namespace is exempt from cleanup (has exemption label), skipping" + continue + fi + + # Check if namespace matches allowed patterns + if ! matches_pattern "$namespace"; then + echo " → Namespace $namespace does not match any allowed patterns, skipping" + continue + fi + + # For matching namespaces, show age comparison with culling threshold + remaining_seconds=$((MAX_AGE_SECONDS - age_seconds)) + remaining_hours=$((remaining_seconds / 3600)) + threshold_display=$(format_hours_threshold {{ .Values.cleanup.maxAge }}) + if [ "$remaining_seconds" -gt 0 ]; then + echo " → Namespace $namespace has ${remaining_hours}h remaining before cleanup (${age_display} < ${threshold_display} threshold)" + else + overdue_hours=$((-remaining_hours)) + echo " → Namespace $namespace is ${overdue_hours}h overdue for cleanup (${age_display} > ${threshold_display} threshold)" + fi + + # Check cleanup conditions + should_cleanup=false + cleanup_reason="" + + {{- if .Values.cleanup.prCleanup.enabled }} + PR_CLEANUP_REASON="" + {{- end }} + + # Check age-based cleanup + if [ "$age_seconds" -gt "$MAX_AGE_SECONDS" ]; then + should_cleanup=true + cleanup_reason="age-based (${age_display} > ${threshold_display})" + fi + + {{- if .Values.cleanup.prCleanup.enabled }} + # Check PR-based cleanup + if check_pr_status "$namespace"; then + should_cleanup=true + if [ -n "$cleanup_reason" ]; then + cleanup_reason="$cleanup_reason and PR-based ($PR_CLEANUP_REASON)" + else + cleanup_reason="PR-based ($PR_CLEANUP_REASON)" + fi + fi + {{- end }} + + if [ "$should_cleanup" = "true" ]; then + echo " → Namespace $namespace eligible for cleanup: $cleanup_reason" + + {{- if .Values.cleanup.dryRun }} + echo " → DRY RUN: Would clean up namespace $namespace ($cleanup_reason)" + {{- else }} + echo " → Using rdu cleanup-deployment for namespace $namespace" + # Create .kube directory and empty config file to satisfy rdu's expectations + mkdir -p /home/appuser/.kube + touch /home/appuser/.kube/config + # Unset KUBECONFIG to force rdu to use in-cluster config + unset KUBECONFIG + echo "yes" | rdu cleanup-deployment --namespace "$namespace" --delete-namespace + echo " → Cleanup completed for namespace: $namespace" + {{- end }} + fi + done + + echo "Renku CI deployment cleanup completed" diff --git a/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml new file mode 100644 index 0000000..26542ed --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml @@ -0,0 +1,81 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "renku-ci-cleanup.fullname" . }} + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} +spec: + schedule: {{ .Values.cronJob.schedule | quote }} + concurrencyPolicy: {{ .Values.cronJob.concurrencyPolicy }} + failedJobsHistoryLimit: {{ .Values.cronJob.failedJobsHistoryLimit }} + successfulJobsHistoryLimit: {{ .Values.cronJob.successfulJobsHistoryLimit }} + jobTemplate: + spec: + template: + metadata: + labels: + {{- include "renku-ci-cleanup.selectorLabels" . | nindent 12 }} + spec: + restartPolicy: {{ .Values.cronJob.restartPolicy }} + serviceAccountName: {{ include "renku-ci-cleanup.serviceAccountName" . }} + containers: + - name: cleanup + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - /bin/bash + - /scripts/cleanup.sh + env: + {{- if .Values.cleanup.dryRun }} + - name: DRY_RUN + value: "true" + {{- end }} + {{- if .Values.cleanup.prCleanup.enabled }} + - name: GITHUB_TOKEN + valueFrom: + secretKeyRef: + name: {{ include "renku-ci-cleanup.fullname" . }}-github-token + key: token + optional: true + {{- end }} + volumeMounts: + - name: cleanup-script + mountPath: /scripts + readOnly: true + - name: service-account-token + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + readOnly: true + resources: + {{- toYaml .Values.resources | nindent 14 }} + volumes: + - name: cleanup-script + configMap: + name: {{ include "renku-ci-cleanup.fullname" . }}-script + defaultMode: 0755 + - name: service-account-token + projected: + sources: + - serviceAccountToken: + path: token + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/templates/rbac.yaml b/ci-deployment-cleanup/helm-chart/templates/rbac.yaml new file mode 100644 index 0000000..6aa0f67 --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/rbac.yaml @@ -0,0 +1,45 @@ +{{- if .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "renku-ci-cleanup.fullname" . }} + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} +rules: +- apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list", "delete", "watch"] +- apiGroups: [""] + resources: ["pods", "services", "configmaps", "secrets", "persistentvolumeclaims"] + verbs: ["get", "list", "delete", "deletecollection"] +- apiGroups: ["apps"] + resources: ["deployments", "replicasets", "statefulsets"] + verbs: ["get", "list", "delete"] +- apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "delete", "deletecollection"] +- apiGroups: ["extensions", "networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get", "list", "delete"] +- apiGroups: ["rbac.authorization.k8s.io"] + resources: ["roles", "rolebindings"] + verbs: ["get", "list", "delete"] +- apiGroups: ["amalthea.dev"] + resources: ["amaltheasessions", "jupyterservers"] + verbs: ["get", "list", "delete", "deletecollection", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "renku-ci-cleanup.fullname" . }} + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "renku-ci-cleanup.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ include "renku-ci-cleanup.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/templates/secret.yaml b/ci-deployment-cleanup/helm-chart/templates/secret.yaml new file mode 100644 index 0000000..1143cd4 --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.cleanup.prCleanup.enabled }} +{{- if .Values.cleanup.prCleanup.githubToken }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "renku-ci-cleanup.fullname" . }}-github-token + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} +type: Opaque +data: + token: {{ .Values.cleanup.prCleanup.githubToken | b64enc }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/templates/serviceaccount.yaml b/ci-deployment-cleanup/helm-chart/templates/serviceaccount.yaml new file mode 100644 index 0000000..1378a1f --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "renku-ci-cleanup.serviceAccountName" . }} + labels: + {{- include "renku-ci-cleanup.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/values.yaml b/ci-deployment-cleanup/helm-chart/values.yaml new file mode 100644 index 0000000..d497f5a --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/values.yaml @@ -0,0 +1,105 @@ +# Default values for renku-ci-cleanup +# This is a YAML-formatted file + +# Container image configuration +image: + repository: ghcr.io/swissdatasciencecenter/renku-ci-cleanup + pullPolicy: IfNotPresent + # tag defaults to appVersion from Chart.yaml if not specified + tag: "" + +# CronJob configuration +cronJob: + # Cron schedule (every 6 hours by default) + schedule: "0 */6 * * *" + + # Concurrency policy for the cronjob + concurrencyPolicy: Forbid + + # Number of failed jobs to keep + failedJobsHistoryLimit: 3 + + # Number of successful jobs to keep + successfulJobsHistoryLimit: 1 + + # Restart policy for the job pods + restartPolicy: OnFailure + +# Cleanup configuration +cleanup: + # Maximum age in hours for CI deployments before cleanup + maxAge: 720 + + # Label used to exempt namespaces from cleanup + # Namespaces with this label will be skipped regardless of age + exemptionLabel: "renku.io/cleanup-exempt=true" + + # Namespace name patterns to match (regex patterns) + # Only namespaces matching these patterns will be considered for cleanup + namespacePatterns: + - "^ci-renku-.*" + - "^renku-blog-ci-.*" + - "^renku-ci-.*" + + # Enable strict name pattern matching (default: true) + # When true, namespaces must match at least one pattern to be cleaned up + enforceNamePatterns: true + + # Dry run mode - set to true to only log what would be deleted + dryRun: false + + # GitHub PR-based cleanup configuration + # Maps namespace patterns to GitHub repositories for PR status checking + # Format: namespace regex pattern -> {repo: "owner/repo", suffixPattern: "regex"} + prCleanup: + enabled: false + # GitHub API token for accessing PR status (required if prCleanup.enabled is true) + # Should be provided via secret or environment variable + githubToken: "" # Provide via secret or environment variable + # Repository mappings + repositories: + - namespacePattern: "^ci-renku-(.+)$" + repo: "SwissDataScienceCenter/renku" + suffixPattern: "(.+)" + - namespacePattern: "^renku-blog-ci-(.+)$" + repo: "SwissDataScienceCenter/renku-blog" + suffixPattern: "(.+)" + # Example: if namespace is "ci-renku-1234", it maps to PR #1234 in SwissDataScienceCenter/renku + +# Service account configuration +serviceAccount: + # Specifies whether a service account should be created + create: true + + # Annotations to add to the service account + annotations: {} + + # The name of the service account to use + name: "" + +# RBAC configuration +rbac: + # Specifies whether RBAC resources should be created + create: true + +# Resource limits and requests +resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 128Mi + +# Node selector for pod assignment +nodeSelector: {} + +# Tolerations for pod assignment +tolerations: [] + +# Affinity for pod assignment +affinity: {} + +# Name overrides +nameOverride: "" +fullnameOverride: "" From d73ce1b430ea9f4402d2b622ac1c45c97a5e7714 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Wed, 30 Jul 2025 11:00:45 +0200 Subject: [PATCH 02/13] chore: remove cleanup script from configmap, add linting --- .github/workflows/docker-build.yml | 4 +- .github/workflows/lint-cleanup-script.yml | 26 ++ ci-deployment-cleanup/helm-chart/cleanup.sh | 365 ++++++++++++++++++ .../helm-chart/templates/configmap.yaml | 264 +------------ .../helm-chart/templates/cronjob.yaml | 26 +- ci-deployment-cleanup/helm-chart/values.yaml | 28 +- 6 files changed, 433 insertions(+), 280 deletions(-) create mode 100644 .github/workflows/lint-cleanup-script.yml create mode 100644 ci-deployment-cleanup/helm-chart/cleanup.sh diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b4391f2..61375c0 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -40,8 +40,8 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=raw,value=${{ steps.version.outputs.version }} - type=raw,value=latest + type=semver,value={{ steps.version.outputs.version }} + type=sha - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/lint-cleanup-script.yml b/.github/workflows/lint-cleanup-script.yml new file mode 100644 index 0000000..01e4317 --- /dev/null +++ b/.github/workflows/lint-cleanup-script.yml @@ -0,0 +1,26 @@ +name: Lint cleanup.sh script + +on: + push: + paths: + - 'ci-deployment-cleanup/helm-chart/cleanup.sh' + pull_request: + paths: + - 'ci-deployment-cleanup/helm-chart/cleanup.sh' + +permissions: + contents: read + +jobs: + shellcheck: + runs-on: ubuntu-24.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master + with: + scandir: './ci-deployment-cleanup/helm-chart' + format: gcc + severity: error \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/cleanup.sh b/ci-deployment-cleanup/helm-chart/cleanup.sh new file mode 100644 index 0000000..144edf6 --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/cleanup.sh @@ -0,0 +1,365 @@ +#!/bin/bash +set -e + +# Debug logging function +debug_log() { + if [ "$DEBUG_MODE" = "true" ]; then + echo "[DEBUG] $*" >&2 + fi +} + +echo "Starting Renku CI deployment cleanup..." +debug_log "Debug mode is enabled" +debug_log "Environment variables: DRY_RUN=$DRY_RUN, MAX_AGE_HOURS=$MAX_AGE_HOURS" + +echo "Max age: $MAX_AGE_HOURS hours" +echo "Exemption label: $EXEMPTION_LABEL" + +if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then + echo "Name pattern enforcement: enabled" + echo "Allowed patterns:" + # NAMESPACE_PATTERNS should be a space-separated list + for pattern in $NAMESPACE_PATTERNS; do + echo " - $pattern" + done +else + echo "Name pattern enforcement: disabled" +fi + +if [ "$PR_CLEANUP_ENABLED" = "true" ]; then + echo "PR-based cleanup: enabled" + echo "Repository mappings:" + # PR_REPOSITORIES should be formatted as "pattern1:repo1 pattern2:repo2" + for mapping in $PR_REPOSITORIES; do + pattern=$(echo "$mapping" | cut -d':' -f1) + repo=$(echo "$mapping" | cut -d':' -f2) + echo " - $pattern -> $repo" + done +else + echo "PR-based cleanup: disabled" +fi + +if [ "$DRY_RUN" = "true" ]; then + echo "DRY RUN MODE: No actual deletions will be performed" +fi + +debug_log "Initialization complete, starting namespace discovery" + +# Function to calculate age in seconds +calculate_age() { + local timestamp="$1" + local current_time=$(date +%s) + + debug_log "Calculating age for timestamp: $timestamp" + + # Kubernetes timestamps are in ISO 8601 format, need to handle them properly + local creation_time + if command -v gdate >/dev/null 2>&1; then + # Use GNU date if available (Linux with coreutils) + creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") + debug_log "Used gdate for timestamp parsing" + else + # For Alpine Linux/BusyBox date, we need to parse the ISO 8601 format manually + # Format: 2025-05-28T13:50:39Z + local year month day hour minute second + year=$(echo "$timestamp" | cut -d'-' -f1) + month=$(echo "$timestamp" | cut -d'-' -f2) + day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) + hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) + minute=$(echo "$timestamp" | cut -d':' -f2) + second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') + + debug_log "Parsed timestamp components: $year-$month-$day $hour:$minute:$second" + + # Use BusyBox date with explicit format + local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" + creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") + debug_log "Used BusyBox date for timestamp parsing" + fi + + if [ "$creation_time" = "0" ]; then + debug_log "Failed to parse timestamp, returning age 0" + echo "0" + else + local age=$((current_time - creation_time)) + debug_log "Calculated age: $age seconds" + echo "$age" + fi +} + +# Function to format age for display +format_age() { + local age_seconds="$1" + local age_hours=$((age_seconds / 3600)) + local age_days=$((age_hours / 24)) + + if [ $age_days -gt 0 ]; then + echo "${age_days}d $((age_hours % 24))h" + else + echo "${age_hours}h" + fi +} + +# Function to format hours to days+hours for thresholds +format_hours_threshold() { + local hours="$1" + local days=$((hours / 24)) + + if [ $days -gt 0 ]; then + echo "${days}d ($((hours % 24))h)" + else + echo "${hours}h" + fi +} + +# Function to check if namespace matches any allowed pattern +matches_pattern() { + local namespace="$1" + debug_log "Checking if namespace '$namespace' matches any allowed patterns" + + if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then + for pattern in $NAMESPACE_PATTERNS; do + debug_log "Testing pattern: $pattern" + if [[ "$namespace" =~ $pattern ]]; then + debug_log "Namespace matches pattern: $pattern" + return 0 + fi + done + debug_log "Namespace does not match any patterns" + return 1 + else + # Pattern enforcement disabled, always return true + debug_log "Pattern enforcement disabled, allowing all namespaces" + return 0 + fi +} + +# Function to check GitHub PR status +check_pr_status() { + local namespace="$1" + local github_token="${GITHUB_TOKEN}" + + debug_log "Checking PR status for namespace: $namespace" + + if [ -z "$github_token" ]; then + echo " → GitHub token not configured, skipping PR status check" + debug_log "No GitHub token available" + return 1 + fi + + # Check each repository mapping + for mapping in $PR_REPOSITORIES; do + local pattern=$(echo "$mapping" | cut -d':' -f1) + local repo=$(echo "$mapping" | cut -d':' -f2) + + debug_log "Checking mapping: $pattern -> $repo" + + if [[ "$namespace" =~ $pattern ]]; then + debug_log "Namespace matches PR pattern: $pattern" + + # Extract PR number (assuming first capture group) + local pr_number="${BASH_REMATCH[1]}" + + if [ -z "$pr_number" ]; then + echo " → Could not extract PR number from namespace $namespace" + debug_log "Failed to extract PR number" + return 1 + fi + + echo " → Checking PR #$pr_number status in $repo" + debug_log "Querying GitHub API for PR #$pr_number in $repo" + + # Query GitHub API for PR status + local pr_response + pr_response=$(curl -s -H "Authorization: token $github_token" \ + "https://api.github.com/repos/$repo/pulls/$pr_number" 2>/dev/null) + + if [ $? -ne 0 ]; then + echo " → Failed to query GitHub API for PR #$pr_number" + debug_log "GitHub API request failed" + return 1 + fi + + debug_log "GitHub API response received" + + # Check if PR exists and get its state + local pr_state + pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') + + if [ -z "$pr_state" ]; then + echo " → PR #$pr_number not found in $repo" + debug_log "PR not found in repository" + # Set global variable for dry run messaging + PR_CLEANUP_REASON="PR #$pr_number not found in $repo" + return 0 # PR doesn't exist, can clean up + fi + + echo " → PR #$pr_number state: $pr_state" + debug_log "PR state: $pr_state" + + # Check if PR is closed or merged + if [ "$pr_state" = "closed" ]; then + # For closed PRs, check if it was merged + local merged + merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') + debug_log "PR merged status: $merged" + + if [ "$merged" = "true" ]; then + echo " → PR #$pr_number is merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is merged in $repo" + else + echo " → PR #$pr_number is closed but not merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in $repo" + fi + return 0 # Can clean up + elif [ "$pr_state" = "open" ]; then + echo " → PR #$pr_number is still open, skipping cleanup" + debug_log "PR is still open, cannot clean up" + return 1 # Cannot clean up + else + echo " → PR #$pr_number has unknown state: $pr_state" + debug_log "Unknown PR state: $pr_state" + return 1 # Unknown state, skip cleanup + fi + fi + done + + echo " → Namespace $namespace does not match any PR cleanup patterns" + debug_log "No matching PR cleanup patterns" + return 1 # No matching pattern +} + +# Get maximum age in seconds +MAX_AGE_SECONDS=$(( MAX_AGE_HOURS * 3600 )) +debug_log "Maximum age threshold: $MAX_AGE_SECONDS seconds ($MAX_AGE_HOURS hours)" + +# Find and process all namespaces +debug_log "Starting namespace enumeration" +kubectl get namespaces \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | \ + while IFS=$'\t' read -r namespace timestamp labels; do + if [ -z "$namespace" ] || [ -z "$timestamp" ]; then + debug_log "Skipping empty namespace or timestamp" + continue + fi + + debug_log "Processing namespace: $namespace" + + age_seconds=$(calculate_age "$timestamp") + age_display=$(format_age "$age_seconds") + + echo "Checking namespace: $namespace (age: $age_display)" + + # Check if namespace is exempt from cleanup + if [[ "$labels" == *"$EXEMPTION_LABEL"* ]]; then + echo " → Namespace $namespace is exempt from cleanup (has exemption label), skipping" + debug_log "Namespace has exemption label: $EXEMPTION_LABEL" + continue + fi + + # Check if namespace matches allowed patterns + if ! matches_pattern "$namespace"; then + echo " → Namespace $namespace does not match any allowed patterns, skipping" + continue + fi + + # For matching namespaces, show age comparison with culling threshold + remaining_seconds=$((MAX_AGE_SECONDS - age_seconds)) + remaining_hours=$((remaining_seconds / 3600)) + threshold_display=$(format_hours_threshold $MAX_AGE_HOURS) + + if [ "$remaining_seconds" -gt 0 ]; then + echo " → Namespace $namespace has ${remaining_hours}h remaining before cleanup (${age_display} < ${threshold_display} threshold)" + debug_log "Namespace is within age threshold" + else + overdue_hours=$((-remaining_hours)) + echo " → Namespace $namespace is ${overdue_hours}h overdue for cleanup (${age_display} > ${threshold_display} threshold)" + debug_log "Namespace exceeds age threshold by ${overdue_hours}h" + fi + + # Check cleanup conditions + should_cleanup=false + cleanup_reason="" + + # Initialize PR cleanup reason variable + PR_CLEANUP_REASON="" + + # Check age-based cleanup + if [ "$age_seconds" -gt "$MAX_AGE_SECONDS" ]; then + should_cleanup=true + cleanup_reason="age-based (${age_display} > ${threshold_display})" + debug_log "Age-based cleanup triggered" + fi + + # Check PR-based cleanup if enabled + if [ "$PR_CLEANUP_ENABLED" = "true" ]; then + if check_pr_status "$namespace"; then + should_cleanup=true + if [ -n "$cleanup_reason" ]; then + cleanup_reason="$cleanup_reason and PR-based ($PR_CLEANUP_REASON)" + else + cleanup_reason="PR-based ($PR_CLEANUP_REASON)" + fi + debug_log "PR-based cleanup triggered: $PR_CLEANUP_REASON" + fi + fi + + if [ "$should_cleanup" = "true" ]; then + echo " → Namespace $namespace eligible for cleanup: $cleanup_reason" + debug_log "Namespace eligible for cleanup with reason: $cleanup_reason" + + if [ "$DRY_RUN" = "true" ]; then + echo " → DRY RUN: Would clean up namespace $namespace ($cleanup_reason)" + debug_log "Dry run mode: would clean up namespace" + else + debug_log "Performing actual cleanup" + # Use rdu cleanup command with force flag to avoid interactive prompts + if command -v rdu >/dev/null 2>&1; then + echo " → Using rdu cleanup-deployment for namespace $namespace" + debug_log "Using rdu for cleanup" + # Create .kube directory and empty config file to satisfy rdu's expectations + mkdir -p /home/appuser/.kube + touch /home/appuser/.kube/config + # Unset KUBECONFIG to force rdu to use in-cluster config + unset KUBECONFIG + echo "yes" | rdu cleanup-deployment --namespace "$namespace" --delete-namespace || { + echo " → Warning: rdu cleanup failed for $namespace" + debug_log "rdu cleanup failed, checking remaining resources" + + # Check what resources still exist in the namespace + echo " → Checking remaining resources in namespace $namespace:" + if kubectl get all -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found"; then + kubectl get all -n "$namespace" 2>/dev/null || echo " No standard resources found" + else + echo " No standard resources found" + fi + + # Also check for other common resources + echo " → Checking for PVCs, secrets, and configmaps:" + kubectl get pvc,secrets,configmaps -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found" || echo " No PVCs, secrets, or configmaps found" + + # Check for any finalizers that might be blocking deletion + echo " → Checking namespace finalizers:" + kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null | grep -q . && { + echo " Finalizers found: $(kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null)" + } || echo " No finalizers found" + + echo " → Attempting manual cleanup" + debug_log "Attempting manual namespace deletion" + kubectl delete namespace "$namespace" --timeout=300s || echo " → Failed to delete namespace $namespace" + } + else + echo " → rdu not available, performing manual cleanup" + debug_log "rdu not available, using kubectl for cleanup" + kubectl delete namespace "$namespace" --timeout=300s || echo " → Failed to delete namespace $namespace" + fi + echo " → Cleanup completed for namespace: $namespace" + debug_log "Cleanup completed for namespace: $namespace" + fi + else + debug_log "Namespace does not meet cleanup criteria" + fi + done + +debug_log "Namespace processing completed" +echo "Renku CI deployment cleanup completed" \ No newline at end of file diff --git a/ci-deployment-cleanup/helm-chart/templates/configmap.yaml b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml index e924b1c..dcc1474 100644 --- a/ci-deployment-cleanup/helm-chart/templates/configmap.yaml +++ b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml @@ -5,266 +5,4 @@ metadata: labels: {{- include "renku-ci-cleanup.labels" . | nindent 4 }} data: - cleanup.sh: | - #!/bin/bash - set -e - - echo "Starting Renku CI deployment cleanup..." - echo "Max age: {{ .Values.cleanup.maxAge }} hours" - echo "Exemption label: {{ .Values.cleanup.exemptionLabel }}" - {{- if .Values.cleanup.enforceNamePatterns }} - echo "Name pattern enforcement: enabled" - echo "Allowed patterns:" - {{- range .Values.cleanup.namespacePatterns }} - echo " - {{ . }}" - {{- end }} - {{- else }} - echo "Name pattern enforcement: disabled" - {{- end }} - - {{- if .Values.cleanup.prCleanup.enabled }} - echo "PR-based cleanup: enabled" - echo "Repository mappings:" - {{- range .Values.cleanup.prCleanup.repositories }} - echo " - {{ .namespacePattern }} -> {{ .repo }}" - {{- end }} - {{- else }} - echo "PR-based cleanup: disabled" - {{- end }} - - {{- if .Values.cleanup.dryRun }} - echo "DRY RUN MODE: No actual deletions will be performed" - {{- end }} - - # Function to calculate age in seconds - calculate_age() { - local timestamp="$1" - local current_time=$(date +%s) - - local creation_time - if command -v gdate >/dev/null 2>&1; then - creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") - else - # Format: 2025-05-28T13:50:39Z - local year month day hour minute second - year=$(echo "$timestamp" | cut -d'-' -f1) - month=$(echo "$timestamp" | cut -d'-' -f2) - day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) - hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) - minute=$(echo "$timestamp" | cut -d':' -f2) - second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') - - local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" - creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") - fi - - if [ "$creation_time" = "0" ]; then - echo "0" - else - echo $((current_time - creation_time)) - fi - } - - # Function to format age for display - format_age() { - local age_seconds="$1" - local age_hours=$((age_seconds / 3600)) - local age_days=$((age_hours / 24)) - - if [ $age_days -gt 0 ]; then - echo "${age_days}d $((age_hours % 24))h" - else - echo "${age_hours}h" - fi - } - - # Function to format hours to days+hours for thresholds - format_hours_threshold() { - local hours="$1" - local days=$((hours / 24)) - - if [ $days -gt 0 ]; then - echo "${days}d ($((hours % 24))h)" - else - echo "${hours}h" - fi - } - - # Function to check if namespace matches any allowed pattern - matches_pattern() { - local namespace="$1" - {{- if .Values.cleanup.enforceNamePatterns }} - {{- range .Values.cleanup.namespacePatterns }} - if [[ "$namespace" =~ {{ . }} ]]; then - return 0 - fi - {{- end }} - return 1 - {{- else }} - # Pattern enforcement disabled, always return true - return 0 - {{- end }} - } - - {{- if .Values.cleanup.prCleanup.enabled }} - # Function to check PR status on GitHub - check_pr_status() { - local namespace="$1" - local github_token="${GITHUB_TOKEN}" - - if [ -z "$github_token" ]; then - echo " → GitHub token not configured, skipping PR status check" - return 1 - fi - - # Check each repository mapping - {{- range .Values.cleanup.prCleanup.repositories }} - if [[ "$namespace" =~ {{ .namespacePattern }} ]]; then - local pr_number - {{- if .suffixPattern }} - pr_number=$(echo "$namespace" | sed -E 's/{{ .namespacePattern }}/\1/') - {{- else }} - pr_number="${BASH_REMATCH[1]}" - {{- end }} - - if [ -z "$pr_number" ]; then - echo " → Could not extract PR number from namespace $namespace" - return 1 - fi - - echo " → Checking PR #$pr_number status in {{ .repo }}" - - # Query GitHub API for PR status - local pr_response - pr_response=$(curl -s -H "Authorization: token $github_token" \ - "https://api.github.com/repos/{{ .repo }}/pulls/$pr_number" 2>/dev/null) - - if [ $? -ne 0 ]; then - echo " → Failed to query GitHub API for PR #$pr_number" - return 1 - fi - - # Check if PR exists and get its state - local pr_state - pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') - - if [ -z "$pr_state" ]; then - echo " → PR #$pr_number not found in {{ .repo }}" - PR_CLEANUP_REASON="PR #$pr_number not found in {{ .repo }}" - return 0 - fi - - echo " → PR #$pr_number state: $pr_state" - - # Check if PR is closed or merged - if [ "$pr_state" = "closed" ]; then - # For closed PRs, check if it was merged - local merged - merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') - if [ "$merged" = "true" ]; then - echo " → PR #$pr_number is merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is merged in {{ .repo }}" - else - echo " → PR #$pr_number is closed but not merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in {{ .repo }}" - fi - return 0 - elif [ "$pr_state" = "open" ]; then - echo " → PR #$pr_number is still open, skipping cleanup" - return 1 - else - echo " → PR #$pr_number has unknown state: $pr_state, skipping cleanup" - return 1 - fi - fi - {{- end }} - - echo " → Namespace $namespace does not match any PR cleanup patterns" - return 1 # No matching pattern - } - {{- end }} - - # Get maximum age in seconds - MAX_AGE_SECONDS=$(( {{ .Values.cleanup.maxAge }} * 3600 )) - - # Find and process all namespaces - kubectl get namespaces \ - -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | \ - while IFS=$'\t' read -r namespace timestamp labels; do - if [ -z "$namespace" ] || [ -z "$timestamp" ]; then - continue - fi - - age_seconds=$(calculate_age "$timestamp") - age_display=$(format_age "$age_seconds") - - echo "Checking namespace: $namespace (age: $age_display)" - - # Check if namespace is exempt from cleanup - if [[ "$labels" == *"{{ .Values.cleanup.exemptionLabel | replace "=" "\\=" }}"* ]]; then - echo " → Namespace $namespace is exempt from cleanup (has exemption label), skipping" - continue - fi - - # Check if namespace matches allowed patterns - if ! matches_pattern "$namespace"; then - echo " → Namespace $namespace does not match any allowed patterns, skipping" - continue - fi - - # For matching namespaces, show age comparison with culling threshold - remaining_seconds=$((MAX_AGE_SECONDS - age_seconds)) - remaining_hours=$((remaining_seconds / 3600)) - threshold_display=$(format_hours_threshold {{ .Values.cleanup.maxAge }}) - if [ "$remaining_seconds" -gt 0 ]; then - echo " → Namespace $namespace has ${remaining_hours}h remaining before cleanup (${age_display} < ${threshold_display} threshold)" - else - overdue_hours=$((-remaining_hours)) - echo " → Namespace $namespace is ${overdue_hours}h overdue for cleanup (${age_display} > ${threshold_display} threshold)" - fi - - # Check cleanup conditions - should_cleanup=false - cleanup_reason="" - - {{- if .Values.cleanup.prCleanup.enabled }} - PR_CLEANUP_REASON="" - {{- end }} - - # Check age-based cleanup - if [ "$age_seconds" -gt "$MAX_AGE_SECONDS" ]; then - should_cleanup=true - cleanup_reason="age-based (${age_display} > ${threshold_display})" - fi - - {{- if .Values.cleanup.prCleanup.enabled }} - # Check PR-based cleanup - if check_pr_status "$namespace"; then - should_cleanup=true - if [ -n "$cleanup_reason" ]; then - cleanup_reason="$cleanup_reason and PR-based ($PR_CLEANUP_REASON)" - else - cleanup_reason="PR-based ($PR_CLEANUP_REASON)" - fi - fi - {{- end }} - - if [ "$should_cleanup" = "true" ]; then - echo " → Namespace $namespace eligible for cleanup: $cleanup_reason" - - {{- if .Values.cleanup.dryRun }} - echo " → DRY RUN: Would clean up namespace $namespace ($cleanup_reason)" - {{- else }} - echo " → Using rdu cleanup-deployment for namespace $namespace" - # Create .kube directory and empty config file to satisfy rdu's expectations - mkdir -p /home/appuser/.kube - touch /home/appuser/.kube/config - # Unset KUBECONFIG to force rdu to use in-cluster config - unset KUBECONFIG - echo "yes" | rdu cleanup-deployment --namespace "$namespace" --delete-namespace - echo " → Cleanup completed for namespace: $namespace" - {{- end }} - fi - done - - echo "Renku CI deployment cleanup completed" +{{ (.Files.Glob "cleanup.sh").AsConfig | indent 2 }} diff --git a/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml index 26542ed..9b2c00c 100644 --- a/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml +++ b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml @@ -26,11 +26,21 @@ spec: - /bin/bash - /scripts/cleanup.sh env: - {{- if .Values.cleanup.dryRun }} - - name: DRY_RUN - value: "true" + - name: MAX_AGE_HOURS + value: {{ .Values.cleanup.maxAge | quote }} + - name: EXEMPTION_LABEL + value: {{ .Values.cleanup.exemptionLabel | quote }} + - name: ENFORCE_NAME_PATTERNS + value: {{ .Values.cleanup.enforceNamePatterns | quote }} + {{- if .Values.cleanup.enforceNamePatterns }} + - name: NAMESPACE_PATTERNS + value: {{ join " " .Values.cleanup.namespacePatterns | quote }} {{- end }} + - name: PR_CLEANUP_ENABLED + value: {{ .Values.cleanup.prCleanup.enabled | quote }} {{- if .Values.cleanup.prCleanup.enabled }} + - name: PR_REPOSITORIES + value: "{{- range $i, $repo := .Values.cleanup.prCleanup.repositories }}{{- if $i }} {{ end }}{{ $repo.namespacePattern }}:{{ $repo.repo }}{{- end }}" - name: GITHUB_TOKEN valueFrom: secretKeyRef: @@ -38,6 +48,16 @@ spec: key: token optional: true {{- end }} + {{- if .Values.cleanup.dryRun }} + - name: DRY_RUN + value: "true" + {{- end }} + {{- if .Values.debug }} + {{- if .Values.debug.enabled }} + - name: DEBUG_MODE + value: "true" + {{- end }} + {{- end }} volumeMounts: - name: cleanup-script mountPath: /scripts diff --git a/ci-deployment-cleanup/helm-chart/values.yaml b/ci-deployment-cleanup/helm-chart/values.yaml index d497f5a..d27649b 100644 --- a/ci-deployment-cleanup/helm-chart/values.yaml +++ b/ci-deployment-cleanup/helm-chart/values.yaml @@ -12,16 +12,16 @@ image: cronJob: # Cron schedule (every 6 hours by default) schedule: "0 */6 * * *" - + # Concurrency policy for the cronjob concurrencyPolicy: Forbid - + # Number of failed jobs to keep failedJobsHistoryLimit: 3 - + # Number of successful jobs to keep successfulJobsHistoryLimit: 1 - + # Restart policy for the job pods restartPolicy: OnFailure @@ -29,25 +29,29 @@ cronJob: cleanup: # Maximum age in hours for CI deployments before cleanup maxAge: 720 - + # Label used to exempt namespaces from cleanup # Namespaces with this label will be skipped regardless of age exemptionLabel: "renku.io/cleanup-exempt=true" - + # Namespace name patterns to match (regex patterns) # Only namespaces matching these patterns will be considered for cleanup namespacePatterns: - "^ci-renku-.*" - "^renku-blog-ci-.*" - "^renku-ci-.*" - + # Enable strict name pattern matching (default: true) # When true, namespaces must match at least one pattern to be cleaned up enforceNamePatterns: true - + # Dry run mode - set to true to only log what would be deleted dryRun: false - + + # Debug mode + debug: + enabled: false + # GitHub PR-based cleanup configuration # Maps namespace patterns to GitHub repositories for PR status checking # Format: namespace regex pattern -> {repo: "owner/repo", suffixPattern: "regex"} @@ -55,7 +59,7 @@ cleanup: enabled: false # GitHub API token for accessing PR status (required if prCleanup.enabled is true) # Should be provided via secret or environment variable - githubToken: "" # Provide via secret or environment variable + githubToken: "" # Repository mappings repositories: - namespacePattern: "^ci-renku-(.+)$" @@ -70,10 +74,10 @@ cleanup: serviceAccount: # Specifies whether a service account should be created create: true - + # Annotations to add to the service account annotations: {} - + # The name of the service account to use name: "" From 4be970e313ea974ea7df9368dcaad5af03c635e0 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Wed, 30 Jul 2025 11:45:57 +0200 Subject: [PATCH 03/13] fix: add missing pattern attribute for semver type --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 61375c0..f4aa404 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -40,7 +40,7 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=semver,value={{ steps.version.outputs.version }} + type=semver,pattern={{version}},value=${{ steps.version.outputs.version }} type=sha - name: Set up Docker Buildx From 5cf07119801d2ecbf7f63ccd76527f6d252a6bac Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 11 Sep 2025 09:28:23 +0200 Subject: [PATCH 04/13] fix(ci-deployment-cleanup): support multiple ns labels --- ci-deployment-cleanup/helm-chart/cleanup.sh | 414 ++++++++++---------- 1 file changed, 211 insertions(+), 203 deletions(-) diff --git a/ci-deployment-cleanup/helm-chart/cleanup.sh b/ci-deployment-cleanup/helm-chart/cleanup.sh index 144edf6..f013cec 100644 --- a/ci-deployment-cleanup/helm-chart/cleanup.sh +++ b/ci-deployment-cleanup/helm-chart/cleanup.sh @@ -3,9 +3,9 @@ set -e # Debug logging function debug_log() { - if [ "$DEBUG_MODE" = "true" ]; then - echo "[DEBUG] $*" >&2 - fi + if [ "$DEBUG_MODE" = "true" ]; then + echo "[DEBUG] $*" >&2 + fi } echo "Starting Renku CI deployment cleanup..." @@ -16,258 +16,265 @@ echo "Max age: $MAX_AGE_HOURS hours" echo "Exemption label: $EXEMPTION_LABEL" if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then - echo "Name pattern enforcement: enabled" - echo "Allowed patterns:" - # NAMESPACE_PATTERNS should be a space-separated list - for pattern in $NAMESPACE_PATTERNS; do - echo " - $pattern" - done + echo "Name pattern enforcement: enabled" + echo "Allowed patterns:" + # NAMESPACE_PATTERNS should be a space-separated list + for pattern in $NAMESPACE_PATTERNS; do + echo " - $pattern" + done else - echo "Name pattern enforcement: disabled" + echo "Name pattern enforcement: disabled" fi if [ "$PR_CLEANUP_ENABLED" = "true" ]; then - echo "PR-based cleanup: enabled" - echo "Repository mappings:" - # PR_REPOSITORIES should be formatted as "pattern1:repo1 pattern2:repo2" - for mapping in $PR_REPOSITORIES; do - pattern=$(echo "$mapping" | cut -d':' -f1) - repo=$(echo "$mapping" | cut -d':' -f2) - echo " - $pattern -> $repo" - done + echo "PR-based cleanup: enabled" + echo "Repository mappings:" + # PR_REPOSITORIES should be formatted as "pattern1:repo1 pattern2:repo2" + for mapping in $PR_REPOSITORIES; do + pattern=$(echo "$mapping" | cut -d':' -f1) + repo=$(echo "$mapping" | cut -d':' -f2) + echo " - $pattern -> $repo" + done else - echo "PR-based cleanup: disabled" + echo "PR-based cleanup: disabled" fi if [ "$DRY_RUN" = "true" ]; then - echo "DRY RUN MODE: No actual deletions will be performed" + echo "DRY RUN MODE: No actual deletions will be performed" fi debug_log "Initialization complete, starting namespace discovery" # Function to calculate age in seconds calculate_age() { - local timestamp="$1" - local current_time=$(date +%s) - - debug_log "Calculating age for timestamp: $timestamp" - - # Kubernetes timestamps are in ISO 8601 format, need to handle them properly - local creation_time - if command -v gdate >/dev/null 2>&1; then - # Use GNU date if available (Linux with coreutils) - creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") - debug_log "Used gdate for timestamp parsing" - else - # For Alpine Linux/BusyBox date, we need to parse the ISO 8601 format manually - # Format: 2025-05-28T13:50:39Z - local year month day hour minute second - year=$(echo "$timestamp" | cut -d'-' -f1) - month=$(echo "$timestamp" | cut -d'-' -f2) - day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) - hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) - minute=$(echo "$timestamp" | cut -d':' -f2) - second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') - - debug_log "Parsed timestamp components: $year-$month-$day $hour:$minute:$second" - - # Use BusyBox date with explicit format - local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" - creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") - debug_log "Used BusyBox date for timestamp parsing" - fi - - if [ "$creation_time" = "0" ]; then - debug_log "Failed to parse timestamp, returning age 0" - echo "0" - else - local age=$((current_time - creation_time)) - debug_log "Calculated age: $age seconds" - echo "$age" - fi + local timestamp="$1" + local current_time=$(date +%s) + + debug_log "Calculating age for timestamp: $timestamp" + + # Kubernetes timestamps are in ISO 8601 format, need to handle them properly + local creation_time + if command -v gdate >/dev/null 2>&1; then + # Use GNU date if available (Linux with coreutils) + creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") + debug_log "Used gdate for timestamp parsing" + else + # For Alpine Linux/BusyBox date, we need to parse the ISO 8601 format manually + # Format: 2025-05-28T13:50:39Z + local year month day hour minute second + year=$(echo "$timestamp" | cut -d'-' -f1) + month=$(echo "$timestamp" | cut -d'-' -f2) + day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) + hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) + minute=$(echo "$timestamp" | cut -d':' -f2) + second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') + + debug_log "Parsed timestamp components: $year-$month-$day $hour:$minute:$second" + + # Use BusyBox date with explicit format + local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" + creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") + debug_log "Used BusyBox date for timestamp parsing" + fi + + if [ "$creation_time" = "0" ]; then + debug_log "Failed to parse timestamp, returning age 0" + echo "0" + else + local age=$((current_time - creation_time)) + debug_log "Calculated age: $age seconds" + echo "$age" + fi } # Function to format age for display format_age() { - local age_seconds="$1" - local age_hours=$((age_seconds / 3600)) - local age_days=$((age_hours / 24)) - - if [ $age_days -gt 0 ]; then - echo "${age_days}d $((age_hours % 24))h" - else - echo "${age_hours}h" - fi + local age_seconds="$1" + local age_hours=$((age_seconds / 3600)) + local age_days=$((age_hours / 24)) + + if [ $age_days -gt 0 ]; then + echo "${age_days}d $((age_hours % 24))h" + else + echo "${age_hours}h" + fi } # Function to format hours to days+hours for thresholds format_hours_threshold() { - local hours="$1" - local days=$((hours / 24)) - - if [ $days -gt 0 ]; then - echo "${days}d ($((hours % 24))h)" - else - echo "${hours}h" - fi + local hours="$1" + local days=$((hours / 24)) + + if [ $days -gt 0 ]; then + echo "${days}d ($((hours % 24))h)" + else + echo "${hours}h" + fi } # Function to check if namespace matches any allowed pattern matches_pattern() { - local namespace="$1" - debug_log "Checking if namespace '$namespace' matches any allowed patterns" - - if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then - for pattern in $NAMESPACE_PATTERNS; do - debug_log "Testing pattern: $pattern" - if [[ "$namespace" =~ $pattern ]]; then - debug_log "Namespace matches pattern: $pattern" - return 0 - fi - done - debug_log "Namespace does not match any patterns" - return 1 - else - # Pattern enforcement disabled, always return true - debug_log "Pattern enforcement disabled, allowing all namespaces" + local namespace="$1" + debug_log "Checking if namespace '$namespace' matches any allowed patterns" + + if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then + for pattern in $NAMESPACE_PATTERNS; do + debug_log "Testing pattern: $pattern" + if [[ "$namespace" =~ $pattern ]]; then + debug_log "Namespace matches pattern: $pattern" return 0 - fi + fi + done + debug_log "Namespace does not match any patterns" + return 1 + else + # Pattern enforcement disabled, always return true + debug_log "Pattern enforcement disabled, allowing all namespaces" + return 0 + fi } # Function to check GitHub PR status check_pr_status() { - local namespace="$1" - local github_token="${GITHUB_TOKEN}" - - debug_log "Checking PR status for namespace: $namespace" - - if [ -z "$github_token" ]; then - echo " → GitHub token not configured, skipping PR status check" - debug_log "No GitHub token available" + local namespace="$1" + local github_token="${GITHUB_TOKEN}" + + debug_log "Checking PR status for namespace: $namespace" + + if [ -z "$github_token" ]; then + echo " → GitHub token not configured, skipping PR status check" + debug_log "No GitHub token available" + return 1 + fi + + # Check each repository mapping + for mapping in $PR_REPOSITORIES; do + local pattern=$(echo "$mapping" | cut -d':' -f1) + local repo=$(echo "$mapping" | cut -d':' -f2) + + debug_log "Checking mapping: $pattern -> $repo" + + if [[ "$namespace" =~ $pattern ]]; then + debug_log "Namespace matches PR pattern: $pattern" + + # Extract PR number (assuming first capture group) + local pr_number="${BASH_REMATCH[1]}" + + if [ -z "$pr_number" ]; then + echo " → Could not extract PR number from namespace $namespace" + debug_log "Failed to extract PR number" return 1 - fi - - # Check each repository mapping - for mapping in $PR_REPOSITORIES; do - local pattern=$(echo "$mapping" | cut -d':' -f1) - local repo=$(echo "$mapping" | cut -d':' -f2) - - debug_log "Checking mapping: $pattern -> $repo" - - if [[ "$namespace" =~ $pattern ]]; then - debug_log "Namespace matches PR pattern: $pattern" - - # Extract PR number (assuming first capture group) - local pr_number="${BASH_REMATCH[1]}" - - if [ -z "$pr_number" ]; then - echo " → Could not extract PR number from namespace $namespace" - debug_log "Failed to extract PR number" - return 1 - fi - - echo " → Checking PR #$pr_number status in $repo" - debug_log "Querying GitHub API for PR #$pr_number in $repo" - - # Query GitHub API for PR status - local pr_response - pr_response=$(curl -s -H "Authorization: token $github_token" \ - "https://api.github.com/repos/$repo/pulls/$pr_number" 2>/dev/null) - - if [ $? -ne 0 ]; then - echo " → Failed to query GitHub API for PR #$pr_number" - debug_log "GitHub API request failed" - return 1 - fi - - debug_log "GitHub API response received" - - # Check if PR exists and get its state - local pr_state - pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') - - if [ -z "$pr_state" ]; then - echo " → PR #$pr_number not found in $repo" - debug_log "PR not found in repository" - # Set global variable for dry run messaging - PR_CLEANUP_REASON="PR #$pr_number not found in $repo" - return 0 # PR doesn't exist, can clean up - fi - - echo " → PR #$pr_number state: $pr_state" - debug_log "PR state: $pr_state" - - # Check if PR is closed or merged - if [ "$pr_state" = "closed" ]; then - # For closed PRs, check if it was merged - local merged - merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') - debug_log "PR merged status: $merged" - - if [ "$merged" = "true" ]; then - echo " → PR #$pr_number is merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is merged in $repo" - else - echo " → PR #$pr_number is closed but not merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in $repo" - fi - return 0 # Can clean up - elif [ "$pr_state" = "open" ]; then - echo " → PR #$pr_number is still open, skipping cleanup" - debug_log "PR is still open, cannot clean up" - return 1 # Cannot clean up - else - echo " → PR #$pr_number has unknown state: $pr_state" - debug_log "Unknown PR state: $pr_state" - return 1 # Unknown state, skip cleanup - fi + fi + + echo " → Checking PR #$pr_number status in $repo" + debug_log "Querying GitHub API for PR #$pr_number in $repo" + + # Query GitHub API for PR status + local pr_response + pr_response=$(curl -s -H "Authorization: token $github_token" \ + "https://api.github.com/repos/$repo/pulls/$pr_number" 2>/dev/null) + + if [ $? -ne 0 ]; then + echo " → Failed to query GitHub API for PR #$pr_number" + debug_log "GitHub API request failed" + return 1 + fi + + debug_log "GitHub API response received" + + # Check if PR exists and get its state + local pr_state + pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') + + if [ -z "$pr_state" ]; then + echo " → PR #$pr_number not found in $repo" + debug_log "PR not found in repository" + # Set global variable for dry run messaging + PR_CLEANUP_REASON="PR #$pr_number not found in $repo" + return 0 # PR doesn't exist, can clean up + fi + + echo " → PR #$pr_number state: $pr_state" + debug_log "PR state: $pr_state" + + # Check if PR is closed or merged + if [ "$pr_state" = "closed" ]; then + # For closed PRs, check if it was merged + local merged + merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') + debug_log "PR merged status: $merged" + + if [ "$merged" = "true" ]; then + echo " → PR #$pr_number is merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is merged in $repo" + else + echo " → PR #$pr_number is closed but not merged, eligible for cleanup" + PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in $repo" fi - done - - echo " → Namespace $namespace does not match any PR cleanup patterns" - debug_log "No matching PR cleanup patterns" - return 1 # No matching pattern + return 0 # Can clean up + elif [ "$pr_state" = "open" ]; then + echo " → PR #$pr_number is still open, skipping cleanup" + debug_log "PR is still open, cannot clean up" + return 1 # Cannot clean up + else + echo " → PR #$pr_number has unknown state: $pr_state" + debug_log "Unknown PR state: $pr_state" + return 1 # Unknown state, skip cleanup + fi + fi + done + + echo " → Namespace $namespace does not match any PR cleanup patterns" + debug_log "No matching PR cleanup patterns" + return 1 # No matching pattern } # Get maximum age in seconds -MAX_AGE_SECONDS=$(( MAX_AGE_HOURS * 3600 )) +MAX_AGE_SECONDS=$((MAX_AGE_HOURS * 3600)) debug_log "Maximum age threshold: $MAX_AGE_SECONDS seconds ($MAX_AGE_HOURS hours)" # Find and process all namespaces debug_log "Starting namespace enumeration" kubectl get namespaces \ - -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | while IFS=$'\t' read -r namespace timestamp labels; do if [ -z "$namespace" ] || [ -z "$timestamp" ]; then debug_log "Skipping empty namespace or timestamp" continue fi - + debug_log "Processing namespace: $namespace" - + age_seconds=$(calculate_age "$timestamp") age_display=$(format_age "$age_seconds") - + echo "Checking namespace: $namespace (age: $age_display)" - + # Check if namespace is exempt from cleanup - if [[ "$labels" == *"$EXEMPTION_LABEL"* ]]; then + exemption_key=$(echo "$EXEMPTION_LABEL" | cut -d'=' -f1) + exemption_value=$(echo "$EXEMPTION_LABEL" | cut -d'=' -f2) + + debug_log "Checking exemption: key='$exemption_key', value='$exemption_value'" + debug_log "Namespace labels: $labels" + + # Check if the exemption label key exists with the correct value in the JSON labels + if [[ "$labels" == *"\"$exemption_key\":\"$exemption_value\""* ]] || [[ "$labels" == *"\"$exemption_key\":$exemption_value"* ]]; then echo " → Namespace $namespace is exempt from cleanup (has exemption label), skipping" - debug_log "Namespace has exemption label: $EXEMPTION_LABEL" + debug_log "Namespace has exemption label: $exemption_key=$exemption_value" continue fi - + # Check if namespace matches allowed patterns if ! matches_pattern "$namespace"; then echo " → Namespace $namespace does not match any allowed patterns, skipping" continue fi - + # For matching namespaces, show age comparison with culling threshold remaining_seconds=$((MAX_AGE_SECONDS - age_seconds)) remaining_hours=$((remaining_seconds / 3600)) threshold_display=$(format_hours_threshold $MAX_AGE_HOURS) - + if [ "$remaining_seconds" -gt 0 ]; then echo " → Namespace $namespace has ${remaining_hours}h remaining before cleanup (${age_display} < ${threshold_display} threshold)" debug_log "Namespace is within age threshold" @@ -276,21 +283,21 @@ kubectl get namespaces \ echo " → Namespace $namespace is ${overdue_hours}h overdue for cleanup (${age_display} > ${threshold_display} threshold)" debug_log "Namespace exceeds age threshold by ${overdue_hours}h" fi - + # Check cleanup conditions should_cleanup=false cleanup_reason="" - + # Initialize PR cleanup reason variable PR_CLEANUP_REASON="" - + # Check age-based cleanup if [ "$age_seconds" -gt "$MAX_AGE_SECONDS" ]; then should_cleanup=true cleanup_reason="age-based (${age_display} > ${threshold_display})" debug_log "Age-based cleanup triggered" fi - + # Check PR-based cleanup if enabled if [ "$PR_CLEANUP_ENABLED" = "true" ]; then if check_pr_status "$namespace"; then @@ -303,11 +310,11 @@ kubectl get namespaces \ debug_log "PR-based cleanup triggered: $PR_CLEANUP_REASON" fi fi - + if [ "$should_cleanup" = "true" ]; then echo " → Namespace $namespace eligible for cleanup: $cleanup_reason" debug_log "Namespace eligible for cleanup with reason: $cleanup_reason" - + if [ "$DRY_RUN" = "true" ]; then echo " → DRY RUN: Would clean up namespace $namespace ($cleanup_reason)" debug_log "Dry run mode: would clean up namespace" @@ -325,7 +332,7 @@ kubectl get namespaces \ echo "yes" | rdu cleanup-deployment --namespace "$namespace" --delete-namespace || { echo " → Warning: rdu cleanup failed for $namespace" debug_log "rdu cleanup failed, checking remaining resources" - + # Check what resources still exist in the namespace echo " → Checking remaining resources in namespace $namespace:" if kubectl get all -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found"; then @@ -333,17 +340,17 @@ kubectl get namespaces \ else echo " No standard resources found" fi - + # Also check for other common resources echo " → Checking for PVCs, secrets, and configmaps:" kubectl get pvc,secrets,configmaps -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found" || echo " No PVCs, secrets, or configmaps found" - + # Check for any finalizers that might be blocking deletion echo " → Checking namespace finalizers:" kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null | grep -q . && { echo " Finalizers found: $(kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null)" } || echo " No finalizers found" - + echo " → Attempting manual cleanup" debug_log "Attempting manual namespace deletion" kubectl delete namespace "$namespace" --timeout=300s || echo " → Failed to delete namespace $namespace" @@ -362,4 +369,5 @@ kubectl get namespaces \ done debug_log "Namespace processing completed" -echo "Renku CI deployment cleanup completed" \ No newline at end of file +echo "Renku CI deployment cleanup completed" + From 16b6d23b381e2c2fb372c4c85eeb3507d797dad7 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 11 Sep 2025 10:19:45 +0200 Subject: [PATCH 05/13] fix(ci-deployment-cleanup): prevent pr-based deletion when gh credentials are bad --- ci-deployment-cleanup/helm-chart/cleanup.sh | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/ci-deployment-cleanup/helm-chart/cleanup.sh b/ci-deployment-cleanup/helm-chart/cleanup.sh index f013cec..cea9e8a 100644 --- a/ci-deployment-cleanup/helm-chart/cleanup.sh +++ b/ci-deployment-cleanup/helm-chart/cleanup.sh @@ -182,6 +182,17 @@ check_pr_status() { debug_log "GitHub API response received" + # Check for authentication errors + local auth_error + auth_error=$(echo "$pr_response" | grep -o '"message":[[:space:]]*"[^"]*"' | sed 's/"message":[[:space:]]*"\([^"]*\)"/\1/') + + if [ "$auth_error" = "Bad credentials" ]; then + echo " → GitHub authentication failed (bad credentials), skipping PR cleanup for all namespaces" + debug_log "GitHub authentication failed - bad credentials" + export GITHUB_AUTH_FAILED=true + return 1 + fi + # Check if PR exists and get its state local pr_state pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') @@ -298,8 +309,8 @@ kubectl get namespaces \ debug_log "Age-based cleanup triggered" fi - # Check PR-based cleanup if enabled - if [ "$PR_CLEANUP_ENABLED" = "true" ]; then + # Check PR-based cleanup if enabled and GitHub auth is working + if [ "$PR_CLEANUP_ENABLED" = "true" ] && [ "$GITHUB_AUTH_FAILED" != "true" ]; then if check_pr_status "$namespace"; then should_cleanup=true if [ -n "$cleanup_reason" ]; then @@ -309,6 +320,9 @@ kubectl get namespaces \ fi debug_log "PR-based cleanup triggered: $PR_CLEANUP_REASON" fi + elif [ "$PR_CLEANUP_ENABLED" = "true" ] && [ "$GITHUB_AUTH_FAILED" = "true" ]; then + echo " → Skipping PR-based cleanup due to GitHub authentication failure" + debug_log "Skipping PR cleanup due to GitHub auth failure" fi if [ "$should_cleanup" = "true" ]; then @@ -370,4 +384,3 @@ kubectl get namespaces \ debug_log "Namespace processing completed" echo "Renku CI deployment cleanup completed" - From 8189811161aa50ef38410403b06b32fc10e6a408 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 13:47:17 +0100 Subject: [PATCH 06/13] write python script for cleanup --- ci-deployment-cleanup/helm-chart/cleanup.py | 342 ++++++++++++++++ ci-deployment-cleanup/helm-chart/cleanup.sh | 386 ------------------ .../helm-chart/templates/configmap.yaml | 2 +- .../helm-chart/templates/cronjob.yaml | 4 +- ci-deployment-cleanup/helm-chart/values.yaml | 4 +- pkg/cmd/cleanupdeployment.go | 42 +- pkg/cmd/root.go | 1 + pkg/k8s/client.go | 35 +- 8 files changed, 391 insertions(+), 425 deletions(-) create mode 100755 ci-deployment-cleanup/helm-chart/cleanup.py delete mode 100644 ci-deployment-cleanup/helm-chart/cleanup.sh diff --git a/ci-deployment-cleanup/helm-chart/cleanup.py b/ci-deployment-cleanup/helm-chart/cleanup.py new file mode 100755 index 0000000..22ca85c --- /dev/null +++ b/ci-deployment-cleanup/helm-chart/cleanup.py @@ -0,0 +1,342 @@ +import subprocess, shlex +import shutil +import json +import re +import os +from dateutil import parser +from datetime import datetime, timedelta +from github import Github, Auth +from kubernetes import client, config +import logging + +namespace_patterns_str = os.environ.get("NAMESPACE_PATTERNS", "") +NAMESPACE_REGEXES = namespace_patterns_str.split() if namespace_patterns_str else [] + +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") +MAX_AGE_HOURS = int(os.environ.get("MAX_AGE_HOURS", "720")) +DRY_RUN = os.environ.get("DRY_RUN", "false").lower() == "true" + +exemption_label_str = os.environ.get("EXEMPTION_LABEL", "") +if exemption_label_str and "=" in exemption_label_str: + EXEMPTION_ANNOTATION = exemption_label_str.split("=", 1)[0] +else: + EXEMPTION_ANNOTATION = "renku.io/cleanup-exempt" + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +console_handler.setFormatter(formatter) +logger.addHandler(console_handler) + +pr_repositories_str = os.environ.get("PR_REPOSITORIES", "") +NAMESPACE_PATTERN_TO_REPO_MAP = {} +if pr_repositories_str: + for mapping in pr_repositories_str.split(): + if ":" in mapping: + pattern, repo = mapping.split(":", 1) + NAMESPACE_PATTERN_TO_REPO_MAP[pattern] = repo + + +class CIDeployment: + def __init__(self, name, namespace, revision, updated, status, chart, app_version): + self.name = name + self.namespace = namespace + self.revision = revision + self.updated = updated + self.status = status + self.chart = chart + self.app_version = app_version + self.repo = None + self.pr_number = None + self.pr_is_open = None + + +class NamespaceChecker: + def __init__(self): + try: + config.load_incluster_config() + except config.ConfigException: + config.load_kube_config() + self.v1 = client.CoreV1Api() + + def is_namespace_exempt(self, namespace_name): + try: + namespace = self.v1.read_namespace(namespace_name) + if namespace.metadata.annotations: + exempt_value = namespace.metadata.annotations.get(EXEMPTION_ANNOTATION) + return exempt_value == "true" + return False + except Exception as e: + logger.error( + f"Error checking namespace annotations for {namespace_name}: {e}" + ) + return True + + +class GithubPRChecker: + def __init__(self, github_token): + self.g = Github(auth=Auth.Token(github_token)) + + def is_pr_open(self, repo_name, pr_number): + try: + repo = self.g.get_repo(repo_name) + pr = repo.get_pull(pr_number) + return pr.state == "open" + except Exception as e: + logger.error(f"Error checking PR status for {repo_name}#{pr_number}: {e}") + return True + + +class ShellExecution: + def __init__(self, command): + self.command = command + + def execute(self, dry_run=True): + try: + args = shlex.split(self.command) + path = shutil.which(args[0]) + if path is None: + raise FileNotFoundError(f"Command not found: {self.command.split()[0]}") + else: + args[0] = path + + logger.debug(f"Executing with resolved path: {args}") + + if dry_run: + return "Dry run enabled. No action taken.", "", 0 + + result = subprocess.run( + args, + timeout=900, + encoding="utf-8", + capture_output=True, + check=False, + ) + + return result.stdout, result.stderr, result.returncode + except subprocess.TimeoutExpired: + return "", "Command timed out", -1 + except FileNotFoundError as e: + return "", str(e), -1 + except Exception as e: + return "", str(e), -1 + + +class CIDeploymentsManager: + def __init__(self): + self.deployments = [] + + def get_deployments(self): + command = "helm list --all-namespaces -o json" + shell_exec = ShellExecution(command) + stdout, stderr, returncode = shell_exec.execute(dry_run=False) + + if returncode != 0: + raise RuntimeError( + f"helm command failed with return code {returncode}: {stderr}" + ) + + if not stdout: + raise RuntimeError(f"helm command returned empty output. stderr: {stderr}") + + input_dict = json.loads(stdout) + output_set = set() + for ns_regex in NAMESPACE_REGEXES: + output_dict = filter( + lambda ns: re.match(ns_regex, ns["namespace"]), input_dict + ) + for item in output_dict: + last_activity = parser.parse(item["updated"][:19]) + item = CIDeployment( + name=item["name"], + namespace=item["namespace"], + revision=item["revision"], + updated=last_activity, + status=item["status"], + chart=item["chart"], + app_version=item["app_version"], + ) + output_set.add(item) + self.deployments = list(output_set) + + def filter_by_age(self, deployments, hours): + threshold_time = datetime.now() - timedelta(hours=hours) + return [dep for dep in deployments if dep.updated < threshold_time] + + def filter_by_closed_prs(self, deployments): + pr_checker = GithubPRChecker(GITHUB_TOKEN) + filtered = [] + for dep in deployments: + if dep.repo and dep.pr_number: + if not pr_checker.is_pr_open(dep.repo, int(dep.pr_number)): + dep.pr_is_open = False + filtered.append(dep) + else: + dep.pr_is_open = True + else: + filtered.append(dep) + return filtered + + def filter_exempt_namespaces(self, deployments): + ns_checker = NamespaceChecker() + filtered = [] + for dep in deployments: + if ns_checker.is_namespace_exempt(dep.namespace): + logger.info(f"Skipping exempt namespace: {dep.namespace}") + else: + filtered.append(dep) + return filtered + + def get_deletable_deployments(self, max_age_hours): + old = self.filter_by_age(self.deployments, max_age_hours) + closed_pr = self.filter_by_closed_prs(self.deployments) + candidates = list(set(old).union(set(closed_pr))) + return self.filter_exempt_namespaces(candidates) + + def print_deployments(self, deployments): + for dep in deployments: + logger.debug(f"\nName: {dep.name}") + logger.debug(f" Namespace: {dep.namespace}") + logger.debug(f" Updated: {dep.updated}") + logger.debug(f" Repo: {dep.repo}") + logger.debug(f" PR: {dep.pr_number}") + logger.debug(f" PR Open: {dep.pr_is_open}") + + def exclude_deployments(self, names_to_exclude): + self.deployments = [ + dep for dep in self.deployments if dep.name not in names_to_exclude + ] + + def match_namespaces_to_repos(self): + for dep in self.deployments: + for pattern, repo in NAMESPACE_PATTERN_TO_REPO_MAP.items(): + if re.match(pattern, dep.namespace): + dep.repo = repo + break + + def assign_pr_numbers(self): + for dep in self.deployments: + potential_pr = dep.namespace.split("-")[-1] + try: + pr_num = int(potential_pr) + dep.pr_number = pr_num + except ValueError: + logger.info( + f"Warning: Could not parse PR number from namespace {dep.namespace}, skipping PR assignment" + ) + dep.pr_number = None + + def run_cleanup(self, max_age_hours=None, dry_run=None): + if max_age_hours is None: + max_age_hours = MAX_AGE_HOURS + if dry_run is None: + dry_run = DRY_RUN + + logger.debug( + f"Starting cleanup with max_age_hours={max_age_hours}, dry_run={dry_run}" + ) + if dry_run: + logger.info("DRY RUN MODE: No actual deletions will be performed") + + logger.debug("Getting CI deployments") + self.get_deployments() + logger.debug(f"Found {len(self.deployments)} CI deployments") + self.match_namespaces_to_repos() + self.assign_pr_numbers() + + logger.debug("Determining deletable CI deployments") + deployments_to_delete = self.get_deletable_deployments(max_age_hours) + + logger.info(f"Total CI deployments to delete: {len(deployments_to_delete)}") + self.print_deployments(deployments=deployments_to_delete) + + successful_deletions = [] + failed_deletions = [] + + for deployment in deployments_to_delete: + remover = CIDeploymentRemover(deployment, dry_run=dry_run) + stdout, stderr, returncode = remover.remove_with_rdu() + + if returncode == 0: + successful_deletions.append(deployment.namespace) + else: + failed_deletions.append((deployment.namespace, returncode, stderr)) + + self.print_summary( + deployments_to_delete, successful_deletions, failed_deletions + ) + + return successful_deletions, failed_deletions + + def print_summary(self, all_deployments, successful, failed): + logger.info("=" * 80) + logger.info("CLEANUP SUMMARY") + logger.info("=" * 80) + logger.info(f"Total CI deployments processed: {len(all_deployments)}") + logger.info(f"Successful deletions: {len(successful)}") + logger.info(f"Failed deletions: {len(failed)}") + + if failed: + logger.error("Failed namespaces:") + for namespace, returncode, stderr in failed: + logger.error(f" - {namespace} (exit code: {returncode})") + if stderr: + logger.error(f" Error: {stderr[:200]}") + + +class CIDeploymentRemover: + def __init__(self, deployment, dry_run=True): + self.deployment = deployment + self.dry_run = dry_run + + def remove(self): + self.remove_with_rdu() + + def remove_with_rdu(self): + command = f"rdu cleanup-deployment --namespace {self.deployment.namespace} --delete-namespace --yes" + logger.info( + f"\n{'[DRY RUN] ' if self.dry_run else ''}Deleting namespace: {self.deployment.namespace}" + ) + logger.debug(f" Updated: {self.deployment.updated}") + logger.debug(f" Repo: {self.deployment.repo}") + logger.debug( + f" PR: {self.deployment.pr_number} (Open: {self.deployment.pr_is_open})" + ) + + if self.dry_run: + logger.info(f" Command: {command}") + return "Dry run enabled. No action taken.", "", 0 + else: + logger.debug(f" Executing: {command}") + shell_exec = ShellExecution(command) + stdout, stderr, returncode = shell_exec.execute(dry_run=False) + + if returncode == 0: + logger.info( + f" ✓ Successfully deleted namespace: {self.deployment.namespace}" + ) + else: + logger.error( + f" ✗ Failed to delete namespace: {self.deployment.namespace}" + ) + logger.debug(f" Return code: {returncode}") + if stderr: + logger.error(f" Error output: {stderr}") + if stdout: + logger.debug(f" Standard output: {stdout}") + + return stdout, stderr, returncode + + +if __name__ == "__main__": + if not GITHUB_TOKEN: + logger.error("ERROR: GITHUB_TOKEN environment variable is required but not set") + exit(1) + + logger.info(f"Environment: MAX_AGE_HOURS={MAX_AGE_HOURS}, DRY_RUN={DRY_RUN}") + + manager = CIDeploymentsManager() + manager.run_cleanup() diff --git a/ci-deployment-cleanup/helm-chart/cleanup.sh b/ci-deployment-cleanup/helm-chart/cleanup.sh deleted file mode 100644 index cea9e8a..0000000 --- a/ci-deployment-cleanup/helm-chart/cleanup.sh +++ /dev/null @@ -1,386 +0,0 @@ -#!/bin/bash -set -e - -# Debug logging function -debug_log() { - if [ "$DEBUG_MODE" = "true" ]; then - echo "[DEBUG] $*" >&2 - fi -} - -echo "Starting Renku CI deployment cleanup..." -debug_log "Debug mode is enabled" -debug_log "Environment variables: DRY_RUN=$DRY_RUN, MAX_AGE_HOURS=$MAX_AGE_HOURS" - -echo "Max age: $MAX_AGE_HOURS hours" -echo "Exemption label: $EXEMPTION_LABEL" - -if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then - echo "Name pattern enforcement: enabled" - echo "Allowed patterns:" - # NAMESPACE_PATTERNS should be a space-separated list - for pattern in $NAMESPACE_PATTERNS; do - echo " - $pattern" - done -else - echo "Name pattern enforcement: disabled" -fi - -if [ "$PR_CLEANUP_ENABLED" = "true" ]; then - echo "PR-based cleanup: enabled" - echo "Repository mappings:" - # PR_REPOSITORIES should be formatted as "pattern1:repo1 pattern2:repo2" - for mapping in $PR_REPOSITORIES; do - pattern=$(echo "$mapping" | cut -d':' -f1) - repo=$(echo "$mapping" | cut -d':' -f2) - echo " - $pattern -> $repo" - done -else - echo "PR-based cleanup: disabled" -fi - -if [ "$DRY_RUN" = "true" ]; then - echo "DRY RUN MODE: No actual deletions will be performed" -fi - -debug_log "Initialization complete, starting namespace discovery" - -# Function to calculate age in seconds -calculate_age() { - local timestamp="$1" - local current_time=$(date +%s) - - debug_log "Calculating age for timestamp: $timestamp" - - # Kubernetes timestamps are in ISO 8601 format, need to handle them properly - local creation_time - if command -v gdate >/dev/null 2>&1; then - # Use GNU date if available (Linux with coreutils) - creation_time=$(gdate -d "$timestamp" +%s 2>/dev/null || echo "0") - debug_log "Used gdate for timestamp parsing" - else - # For Alpine Linux/BusyBox date, we need to parse the ISO 8601 format manually - # Format: 2025-05-28T13:50:39Z - local year month day hour minute second - year=$(echo "$timestamp" | cut -d'-' -f1) - month=$(echo "$timestamp" | cut -d'-' -f2) - day=$(echo "$timestamp" | cut -d'T' -f1 | cut -d'-' -f3) - hour=$(echo "$timestamp" | cut -d'T' -f2 | cut -d':' -f1) - minute=$(echo "$timestamp" | cut -d':' -f2) - second=$(echo "$timestamp" | cut -d':' -f3 | sed 's/Z$//') - - debug_log "Parsed timestamp components: $year-$month-$day $hour:$minute:$second" - - # Use BusyBox date with explicit format - local formatted_timestamp="${year}-${month}-${day} ${hour}:${minute}:${second}" - creation_time=$(date -d "$formatted_timestamp" +%s 2>/dev/null || echo "0") - debug_log "Used BusyBox date for timestamp parsing" - fi - - if [ "$creation_time" = "0" ]; then - debug_log "Failed to parse timestamp, returning age 0" - echo "0" - else - local age=$((current_time - creation_time)) - debug_log "Calculated age: $age seconds" - echo "$age" - fi -} - -# Function to format age for display -format_age() { - local age_seconds="$1" - local age_hours=$((age_seconds / 3600)) - local age_days=$((age_hours / 24)) - - if [ $age_days -gt 0 ]; then - echo "${age_days}d $((age_hours % 24))h" - else - echo "${age_hours}h" - fi -} - -# Function to format hours to days+hours for thresholds -format_hours_threshold() { - local hours="$1" - local days=$((hours / 24)) - - if [ $days -gt 0 ]; then - echo "${days}d ($((hours % 24))h)" - else - echo "${hours}h" - fi -} - -# Function to check if namespace matches any allowed pattern -matches_pattern() { - local namespace="$1" - debug_log "Checking if namespace '$namespace' matches any allowed patterns" - - if [ "$ENFORCE_NAME_PATTERNS" = "true" ]; then - for pattern in $NAMESPACE_PATTERNS; do - debug_log "Testing pattern: $pattern" - if [[ "$namespace" =~ $pattern ]]; then - debug_log "Namespace matches pattern: $pattern" - return 0 - fi - done - debug_log "Namespace does not match any patterns" - return 1 - else - # Pattern enforcement disabled, always return true - debug_log "Pattern enforcement disabled, allowing all namespaces" - return 0 - fi -} - -# Function to check GitHub PR status -check_pr_status() { - local namespace="$1" - local github_token="${GITHUB_TOKEN}" - - debug_log "Checking PR status for namespace: $namespace" - - if [ -z "$github_token" ]; then - echo " → GitHub token not configured, skipping PR status check" - debug_log "No GitHub token available" - return 1 - fi - - # Check each repository mapping - for mapping in $PR_REPOSITORIES; do - local pattern=$(echo "$mapping" | cut -d':' -f1) - local repo=$(echo "$mapping" | cut -d':' -f2) - - debug_log "Checking mapping: $pattern -> $repo" - - if [[ "$namespace" =~ $pattern ]]; then - debug_log "Namespace matches PR pattern: $pattern" - - # Extract PR number (assuming first capture group) - local pr_number="${BASH_REMATCH[1]}" - - if [ -z "$pr_number" ]; then - echo " → Could not extract PR number from namespace $namespace" - debug_log "Failed to extract PR number" - return 1 - fi - - echo " → Checking PR #$pr_number status in $repo" - debug_log "Querying GitHub API for PR #$pr_number in $repo" - - # Query GitHub API for PR status - local pr_response - pr_response=$(curl -s -H "Authorization: token $github_token" \ - "https://api.github.com/repos/$repo/pulls/$pr_number" 2>/dev/null) - - if [ $? -ne 0 ]; then - echo " → Failed to query GitHub API for PR #$pr_number" - debug_log "GitHub API request failed" - return 1 - fi - - debug_log "GitHub API response received" - - # Check for authentication errors - local auth_error - auth_error=$(echo "$pr_response" | grep -o '"message":[[:space:]]*"[^"]*"' | sed 's/"message":[[:space:]]*"\([^"]*\)"/\1/') - - if [ "$auth_error" = "Bad credentials" ]; then - echo " → GitHub authentication failed (bad credentials), skipping PR cleanup for all namespaces" - debug_log "GitHub authentication failed - bad credentials" - export GITHUB_AUTH_FAILED=true - return 1 - fi - - # Check if PR exists and get its state - local pr_state - pr_state=$(echo "$pr_response" | grep -o '"state":[[:space:]]*"[^"]*"' | sed 's/"state":[[:space:]]*"\([^"]*\)"/\1/') - - if [ -z "$pr_state" ]; then - echo " → PR #$pr_number not found in $repo" - debug_log "PR not found in repository" - # Set global variable for dry run messaging - PR_CLEANUP_REASON="PR #$pr_number not found in $repo" - return 0 # PR doesn't exist, can clean up - fi - - echo " → PR #$pr_number state: $pr_state" - debug_log "PR state: $pr_state" - - # Check if PR is closed or merged - if [ "$pr_state" = "closed" ]; then - # For closed PRs, check if it was merged - local merged - merged=$(echo "$pr_response" | grep -o '"merged":[[:space:]]*[^,}]*' | sed 's/"merged":[[:space:]]*\([^,}]*\)/\1/') - debug_log "PR merged status: $merged" - - if [ "$merged" = "true" ]; then - echo " → PR #$pr_number is merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is merged in $repo" - else - echo " → PR #$pr_number is closed but not merged, eligible for cleanup" - PR_CLEANUP_REASON="PR #$pr_number is closed (not merged) in $repo" - fi - return 0 # Can clean up - elif [ "$pr_state" = "open" ]; then - echo " → PR #$pr_number is still open, skipping cleanup" - debug_log "PR is still open, cannot clean up" - return 1 # Cannot clean up - else - echo " → PR #$pr_number has unknown state: $pr_state" - debug_log "Unknown PR state: $pr_state" - return 1 # Unknown state, skip cleanup - fi - fi - done - - echo " → Namespace $namespace does not match any PR cleanup patterns" - debug_log "No matching PR cleanup patterns" - return 1 # No matching pattern -} - -# Get maximum age in seconds -MAX_AGE_SECONDS=$((MAX_AGE_HOURS * 3600)) -debug_log "Maximum age threshold: $MAX_AGE_SECONDS seconds ($MAX_AGE_HOURS hours)" - -# Find and process all namespaces -debug_log "Starting namespace enumeration" -kubectl get namespaces \ - -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.metadata.creationTimestamp}{"\t"}{.metadata.labels}{"\n"}{end}' | - while IFS=$'\t' read -r namespace timestamp labels; do - if [ -z "$namespace" ] || [ -z "$timestamp" ]; then - debug_log "Skipping empty namespace or timestamp" - continue - fi - - debug_log "Processing namespace: $namespace" - - age_seconds=$(calculate_age "$timestamp") - age_display=$(format_age "$age_seconds") - - echo "Checking namespace: $namespace (age: $age_display)" - - # Check if namespace is exempt from cleanup - exemption_key=$(echo "$EXEMPTION_LABEL" | cut -d'=' -f1) - exemption_value=$(echo "$EXEMPTION_LABEL" | cut -d'=' -f2) - - debug_log "Checking exemption: key='$exemption_key', value='$exemption_value'" - debug_log "Namespace labels: $labels" - - # Check if the exemption label key exists with the correct value in the JSON labels - if [[ "$labels" == *"\"$exemption_key\":\"$exemption_value\""* ]] || [[ "$labels" == *"\"$exemption_key\":$exemption_value"* ]]; then - echo " → Namespace $namespace is exempt from cleanup (has exemption label), skipping" - debug_log "Namespace has exemption label: $exemption_key=$exemption_value" - continue - fi - - # Check if namespace matches allowed patterns - if ! matches_pattern "$namespace"; then - echo " → Namespace $namespace does not match any allowed patterns, skipping" - continue - fi - - # For matching namespaces, show age comparison with culling threshold - remaining_seconds=$((MAX_AGE_SECONDS - age_seconds)) - remaining_hours=$((remaining_seconds / 3600)) - threshold_display=$(format_hours_threshold $MAX_AGE_HOURS) - - if [ "$remaining_seconds" -gt 0 ]; then - echo " → Namespace $namespace has ${remaining_hours}h remaining before cleanup (${age_display} < ${threshold_display} threshold)" - debug_log "Namespace is within age threshold" - else - overdue_hours=$((-remaining_hours)) - echo " → Namespace $namespace is ${overdue_hours}h overdue for cleanup (${age_display} > ${threshold_display} threshold)" - debug_log "Namespace exceeds age threshold by ${overdue_hours}h" - fi - - # Check cleanup conditions - should_cleanup=false - cleanup_reason="" - - # Initialize PR cleanup reason variable - PR_CLEANUP_REASON="" - - # Check age-based cleanup - if [ "$age_seconds" -gt "$MAX_AGE_SECONDS" ]; then - should_cleanup=true - cleanup_reason="age-based (${age_display} > ${threshold_display})" - debug_log "Age-based cleanup triggered" - fi - - # Check PR-based cleanup if enabled and GitHub auth is working - if [ "$PR_CLEANUP_ENABLED" = "true" ] && [ "$GITHUB_AUTH_FAILED" != "true" ]; then - if check_pr_status "$namespace"; then - should_cleanup=true - if [ -n "$cleanup_reason" ]; then - cleanup_reason="$cleanup_reason and PR-based ($PR_CLEANUP_REASON)" - else - cleanup_reason="PR-based ($PR_CLEANUP_REASON)" - fi - debug_log "PR-based cleanup triggered: $PR_CLEANUP_REASON" - fi - elif [ "$PR_CLEANUP_ENABLED" = "true" ] && [ "$GITHUB_AUTH_FAILED" = "true" ]; then - echo " → Skipping PR-based cleanup due to GitHub authentication failure" - debug_log "Skipping PR cleanup due to GitHub auth failure" - fi - - if [ "$should_cleanup" = "true" ]; then - echo " → Namespace $namespace eligible for cleanup: $cleanup_reason" - debug_log "Namespace eligible for cleanup with reason: $cleanup_reason" - - if [ "$DRY_RUN" = "true" ]; then - echo " → DRY RUN: Would clean up namespace $namespace ($cleanup_reason)" - debug_log "Dry run mode: would clean up namespace" - else - debug_log "Performing actual cleanup" - # Use rdu cleanup command with force flag to avoid interactive prompts - if command -v rdu >/dev/null 2>&1; then - echo " → Using rdu cleanup-deployment for namespace $namespace" - debug_log "Using rdu for cleanup" - # Create .kube directory and empty config file to satisfy rdu's expectations - mkdir -p /home/appuser/.kube - touch /home/appuser/.kube/config - # Unset KUBECONFIG to force rdu to use in-cluster config - unset KUBECONFIG - echo "yes" | rdu cleanup-deployment --namespace "$namespace" --delete-namespace || { - echo " → Warning: rdu cleanup failed for $namespace" - debug_log "rdu cleanup failed, checking remaining resources" - - # Check what resources still exist in the namespace - echo " → Checking remaining resources in namespace $namespace:" - if kubectl get all -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found"; then - kubectl get all -n "$namespace" 2>/dev/null || echo " No standard resources found" - else - echo " No standard resources found" - fi - - # Also check for other common resources - echo " → Checking for PVCs, secrets, and configmaps:" - kubectl get pvc,secrets,configmaps -n "$namespace" 2>/dev/null | grep -v "^NAME" | grep -v "No resources found" || echo " No PVCs, secrets, or configmaps found" - - # Check for any finalizers that might be blocking deletion - echo " → Checking namespace finalizers:" - kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null | grep -q . && { - echo " Finalizers found: $(kubectl get namespace "$namespace" -o jsonpath='{.spec.finalizers}' 2>/dev/null)" - } || echo " No finalizers found" - - echo " → Attempting manual cleanup" - debug_log "Attempting manual namespace deletion" - kubectl delete namespace "$namespace" --timeout=300s || echo " → Failed to delete namespace $namespace" - } - else - echo " → rdu not available, performing manual cleanup" - debug_log "rdu not available, using kubectl for cleanup" - kubectl delete namespace "$namespace" --timeout=300s || echo " → Failed to delete namespace $namespace" - fi - echo " → Cleanup completed for namespace: $namespace" - debug_log "Cleanup completed for namespace: $namespace" - fi - else - debug_log "Namespace does not meet cleanup criteria" - fi - done - -debug_log "Namespace processing completed" -echo "Renku CI deployment cleanup completed" diff --git a/ci-deployment-cleanup/helm-chart/templates/configmap.yaml b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml index dcc1474..4b44677 100644 --- a/ci-deployment-cleanup/helm-chart/templates/configmap.yaml +++ b/ci-deployment-cleanup/helm-chart/templates/configmap.yaml @@ -5,4 +5,4 @@ metadata: labels: {{- include "renku-ci-cleanup.labels" . | nindent 4 }} data: -{{ (.Files.Glob "cleanup.sh").AsConfig | indent 2 }} +{{ (.Files.Glob "cleanup.py").AsConfig | indent 2 }} diff --git a/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml index 9b2c00c..2ecd2a8 100644 --- a/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml +++ b/ci-deployment-cleanup/helm-chart/templates/cronjob.yaml @@ -23,8 +23,8 @@ spec: image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} command: - - /bin/bash - - /scripts/cleanup.sh + - python3 + - /scripts/cleanup.py env: - name: MAX_AGE_HOURS value: {{ .Values.cleanup.maxAge | quote }} diff --git a/ci-deployment-cleanup/helm-chart/values.yaml b/ci-deployment-cleanup/helm-chart/values.yaml index d27649b..8112f24 100644 --- a/ci-deployment-cleanup/helm-chart/values.yaml +++ b/ci-deployment-cleanup/helm-chart/values.yaml @@ -17,10 +17,10 @@ cronJob: concurrencyPolicy: Forbid # Number of failed jobs to keep - failedJobsHistoryLimit: 3 + failedJobsHistoryLimit: 5 # Number of successful jobs to keep - successfulJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 3 # Restart policy for the job pods restartPolicy: OnFailure diff --git a/pkg/cmd/cleanupdeployment.go b/pkg/cmd/cleanupdeployment.go index 584a170..f60eecf 100644 --- a/pkg/cmd/cleanupdeployment.go +++ b/pkg/cmd/cleanupdeployment.go @@ -13,6 +13,7 @@ import ( ns "github.com/SwissDataScienceCenter/renku-dev-utils/pkg/namespace" "github.com/spf13/cobra" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/api/errors" ) var cleanupDeploymentCmd = &cobra.Command{ @@ -49,29 +50,31 @@ func cleanupDeployment(cmd *cobra.Command, args []string) { os.Exit(1) } - // Ask for confirmation - fmt.Printf("This command will perform the following actions in the namespace '%s':\n", namespace) - fmt.Println(" 1. Delete all sessions") - fmt.Println(" 2. Uninstall all helm releases") - fmt.Println(" 3. Delete all jobs") - fmt.Println(" 4. Delete all PVCs") - fmt.Println(" 5. Forcibly delete all sessions") - if deleteNamespace { - fmt.Printf(" 6. Delete the namespace '%s'\n", namespace) - } - proceed, err := askForConfirmation("Proceed?") - if err != nil { - fmt.Println(err) - os.Exit(1) - } - if !proceed { - os.Exit(0) + // Ask for confirmation unless --yes flag is set + if !yes { + fmt.Printf("This command will perform the following actions in the namespace '%s':\n", namespace) + fmt.Println(" 1. Delete all sessions") + fmt.Println(" 2. Uninstall all helm releases") + fmt.Println(" 3. Delete all jobs") + fmt.Println(" 4. Delete all PVCs") + fmt.Println(" 5. Forcibly delete all sessions") + if deleteNamespace { + fmt.Printf(" 6. Delete the namespace '%s'\n", namespace) + } + proceed, err := askForConfirmation("Proceed?") + if err != nil { + fmt.Println(err) + os.Exit(1) + } + if !proceed { + os.Exit(0) + } } // 1. Delete all sessions fmt.Println("1. Delete all sessions") err = k8s.DeleteAllSessions(ctx, client, namespace, k8s.DeleteAllSessionsOptions{}) - if err != nil { + if err != nil && !errors.IsNotFound(err) { fmt.Println(err) os.Exit(1) } @@ -109,7 +112,7 @@ func cleanupDeployment(cmd *cobra.Command, args []string) { // 5. Forcibly delete all sessions fmt.Println("5. Forcibly delete all sessions") err = k8s.ForciblyDeleteAllSessions(ctx, client, namespace, k8s.DeleteAllSessionsOptions{}) - if err != nil { + if err != nil && !errors.IsNotFound(err) { fmt.Println(err) os.Exit(1) } @@ -128,6 +131,7 @@ func cleanupDeployment(cmd *cobra.Command, args []string) { func init() { cleanupDeploymentCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "k8s namespace") cleanupDeploymentCmd.Flags().BoolVar(&deleteNamespace, "delete-namespace", false, "if set, the namespace will be deleted") + cleanupDeploymentCmd.Flags().BoolVarP(&yes, "yes", "y", false, "skip confirmation prompt") } func askForConfirmation(question string) (response bool, err error) { diff --git a/pkg/cmd/root.go b/pkg/cmd/root.go index 495a9b6..e23b45a 100644 --- a/pkg/cmd/root.go +++ b/pkg/cmd/root.go @@ -15,6 +15,7 @@ var secretKey string var secretKeyUsername string var secretName string var userEmail string +var yes bool var rootCmd = &cobra.Command{ Use: "rdu", diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index 6122682..d08b525 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -2,22 +2,33 @@ package k8s import ( "fmt" - "path/filepath" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/util/homedir" ) -func GetClientset() (*kubernetes.Clientset, error) { - home := homedir.HomeDir() - if home == "" { - return nil, fmt.Errorf("could not determine home directory") +func getConfig() (*rest.Config, error) { + config, err := rest.InClusterConfig() + if err == nil { + return config, nil + } + + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + configOverrides := &clientcmd.ConfigOverrides{} + kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + + config, err = kubeConfig.ClientConfig() + if err != nil { + return nil, fmt.Errorf("failed to load kubeconfig: %w", err) } - kubeconfig := filepath.Join(home, ".kube", "config") - config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + return config, nil +} + +func GetClientset() (*kubernetes.Clientset, error) { + config, err := getConfig() if err != nil { return nil, err } @@ -26,13 +37,7 @@ func GetClientset() (*kubernetes.Clientset, error) { } func GetDynamicClient() (client *dynamic.DynamicClient, err error) { - home := homedir.HomeDir() - if home == "" { - return nil, fmt.Errorf("could not determine home directory") - } - - kubeconfig := filepath.Join(home, ".kube", "config") - config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + config, err := getConfig() if err != nil { return nil, err } From de430ceb9b7c65fdbcc6b6b997a7842456eacb7f Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 13:52:38 +0100 Subject: [PATCH 07/13] update ci and DOckerfile --- .github/workflows/lint-cleanup-script.yml | 27 ++++++++++++++--------- ci-deployment-cleanup/Dockerfile | 8 +++++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/.github/workflows/lint-cleanup-script.yml b/.github/workflows/lint-cleanup-script.yml index 01e4317..cf78754 100644 --- a/.github/workflows/lint-cleanup-script.yml +++ b/.github/workflows/lint-cleanup-script.yml @@ -1,26 +1,33 @@ -name: Lint cleanup.sh script +name: Lint cleanup.py script on: push: paths: - - 'ci-deployment-cleanup/helm-chart/cleanup.sh' + - 'ci-deployment-cleanup/helm-chart/cleanup.py' pull_request: paths: - - 'ci-deployment-cleanup/helm-chart/cleanup.sh' + - 'ci-deployment-cleanup/helm-chart/cleanup.py' permissions: contents: read jobs: - shellcheck: + ruff: runs-on: ubuntu-24.04 steps: - name: Checkout uses: actions/checkout@v4 - - - name: Run ShellCheck - uses: ludeeus/action-shellcheck@master + + - name: Set up Python + uses: actions/setup-python@v5 with: - scandir: './ci-deployment-cleanup/helm-chart' - format: gcc - severity: error \ No newline at end of file + python-version: '3.11' + + - name: Install Ruff + run: pip install ruff + + - name: Run Ruff format check + run: ruff format --check ci-deployment-cleanup/helm-chart/cleanup.py + + - name: Run Ruff linting + run: ruff check ci-deployment-cleanup/helm-chart/cleanup.py \ No newline at end of file diff --git a/ci-deployment-cleanup/Dockerfile b/ci-deployment-cleanup/Dockerfile index 23688d0..0e6ade0 100644 --- a/ci-deployment-cleanup/Dockerfile +++ b/ci-deployment-cleanup/Dockerfile @@ -18,11 +18,19 @@ RUN apk add --no-cache \ ca-certificates \ jq \ openssl \ + python3 \ + py3-pip \ && ARCH=$(case $(uname -m) in x86_64) echo amd64;; aarch64) echo arm64;; *) echo amd64;; esac) \ && curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl" \ && chmod +x kubectl \ && mv kubectl /usr/local/bin/ +# Install Python dependencies +RUN pip3 install --no-cache-dir \ + python-dateutil \ + PyGithub \ + kubernetes + RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 \ && chmod 700 get_helm.sh \ && ./get_helm.sh \ From 2c4b82a65af6a76045a64d94c67a595c8521dbd2 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 13:55:36 +0100 Subject: [PATCH 08/13] fix: formatting, linting --- ci-deployment-cleanup/helm-chart/cleanup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci-deployment-cleanup/helm-chart/cleanup.py b/ci-deployment-cleanup/helm-chart/cleanup.py index 22ca85c..9562c2f 100755 --- a/ci-deployment-cleanup/helm-chart/cleanup.py +++ b/ci-deployment-cleanup/helm-chart/cleanup.py @@ -1,4 +1,5 @@ -import subprocess, shlex +import subprocess +import shlex import shutil import json import re From d7bdfd863f885e8f3f3d6883e4f52b644992f8b0 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 14:23:28 +0100 Subject: [PATCH 09/13] restore original kubeconfig loading functionality --- pkg/k8s/client.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index d08b525..ffbecb3 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -2,26 +2,43 @@ package k8s import ( "fmt" + "os" + "path/filepath" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/homedir" ) func getConfig() (*rest.Config, error) { + // Try in-cluster config first (for running inside K8s pods) config, err := rest.InClusterConfig() if err == nil { return config, nil } - loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() - configOverrides := &clientcmd.ConfigOverrides{} - kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + // Check KUBECONFIG environment variable + kubeconfigPath := os.Getenv("KUBECONFIG") + if kubeconfigPath != "" { + config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + return nil, err + } + return config, nil + } - config, err = kubeConfig.ClientConfig() + // Fall back to default kubeconfig location + home := homedir.HomeDir() + if home == "" { + return nil, fmt.Errorf("could not determine home directory") + } + + kubeconfigPath = filepath.Join(home, ".kube", "config") + config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) if err != nil { - return nil, fmt.Errorf("failed to load kubeconfig: %w", err) + return nil, err } return config, nil From 19e47c8415a5bb9080325da717e37a7c8c4460e7 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 14:27:08 +0100 Subject: [PATCH 10/13] restore original variables --- pkg/k8s/client.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index ffbecb3..9f97cec 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -13,30 +13,27 @@ import ( ) func getConfig() (*rest.Config, error) { - // Try in-cluster config first (for running inside K8s pods) config, err := rest.InClusterConfig() if err == nil { return config, nil } - // Check KUBECONFIG environment variable - kubeconfigPath := os.Getenv("KUBECONFIG") - if kubeconfigPath != "" { - config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) + kubeconfig := os.Getenv("KUBECONFIG") + if kubeconfig != "" { + config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { return nil, err } return config, nil } - // Fall back to default kubeconfig location home := homedir.HomeDir() if home == "" { return nil, fmt.Errorf("could not determine home directory") } - kubeconfigPath = filepath.Join(home, ".kube", "config") - config, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath) + kubeconfig = filepath.Join(home, ".kube", "config") + config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { return nil, err } From 8d8ad603456cf24f0c6a694d8b886554375764ab Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 14:30:33 +0100 Subject: [PATCH 11/13] restore original variables --- pkg/k8s/client.go | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index 9f97cec..d790b93 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -12,46 +12,45 @@ import ( "k8s.io/client-go/util/homedir" ) -func getConfig() (*rest.Config, error) { +func GetClientset() (*kubernetes.Clientset, error) { config, err := rest.InClusterConfig() if err == nil { - return config, nil + return kubernetes.NewForConfig(config) } kubeconfig := os.Getenv("KUBECONFIG") - if kubeconfig != "" { - config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) - if err != nil { - return nil, err + if kubeconfig == "" { + home := homedir.HomeDir() + if home == "" { + return nil, fmt.Errorf("could not determine home directory") } - return config, nil - } - - home := homedir.HomeDir() - if home == "" { - return nil, fmt.Errorf("could not determine home directory") + kubeconfig = filepath.Join(home, ".kube", "config") } - kubeconfig = filepath.Join(home, ".kube", "config") config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { return nil, err } - return config, nil + return kubernetes.NewForConfig(config) } -func GetClientset() (*kubernetes.Clientset, error) { - config, err := getConfig() - if err != nil { - return nil, err +func GetDynamicClient() (client *dynamic.DynamicClient, err error) { + config, err := rest.InClusterConfig() + if err == nil { + return dynamic.NewForConfig(config) } - return kubernetes.NewForConfig(config) -} + kubeconfig := os.Getenv("KUBECONFIG") + if kubeconfig == "" { + home := homedir.HomeDir() + if home == "" { + return nil, fmt.Errorf("could not determine home directory") + } + kubeconfig = filepath.Join(home, ".kube", "config") + } -func GetDynamicClient() (client *dynamic.DynamicClient, err error) { - config, err := getConfig() + config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) if err != nil { return nil, err } From 579d6800fbc4f62805f94647e169638862d2d159 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 14:55:16 +0100 Subject: [PATCH 12/13] update base layer --- ci-deployment-cleanup/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-deployment-cleanup/Dockerfile b/ci-deployment-cleanup/Dockerfile index 0e6ade0..6049ae8 100644 --- a/ci-deployment-cleanup/Dockerfile +++ b/ci-deployment-cleanup/Dockerfile @@ -1,6 +1,6 @@ FROM golang:1.24-alpine AS builder -RUN apk add --no-cache make bash +RUN apk add --no-cache make bash git WORKDIR /app From 16c70e55989d0ff29c7b4177cb090d022f32caa9 Mon Sep 17 00:00:00 2001 From: Wes Johnson Date: Thu, 4 Dec 2025 15:11:05 +0100 Subject: [PATCH 13/13] try fixing arm64 build --- ci-deployment-cleanup/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-deployment-cleanup/Dockerfile b/ci-deployment-cleanup/Dockerfile index 6049ae8..b9222c9 100644 --- a/ci-deployment-cleanup/Dockerfile +++ b/ci-deployment-cleanup/Dockerfile @@ -1,6 +1,6 @@ FROM golang:1.24-alpine AS builder -RUN apk add --no-cache make bash git +RUN apk add --no-cache --no-scripts make bash git || apk fix WORKDIR /app