From 340bb120f25026913fa4c13194b917fa665e3564 Mon Sep 17 00:00:00 2001 From: Emil Walter Date: Thu, 5 Jun 2025 09:37:27 +0200 Subject: [PATCH] feat: implemented a hard-delete version of the archival script --- delete-old-scans/README.md | 150 ++++++++++++++++++ delete-old-scans/delete_old_scans.py | 227 +++++++++++++++++++++++++++ delete-old-scans/requirements.txt | 6 + 3 files changed, 383 insertions(+) create mode 100644 delete-old-scans/README.md create mode 100755 delete-old-scans/delete_old_scans.py create mode 100644 delete-old-scans/requirements.txt diff --git a/delete-old-scans/README.md b/delete-old-scans/README.md new file mode 100644 index 0000000..4a187e4 --- /dev/null +++ b/delete-old-scans/README.md @@ -0,0 +1,150 @@ +# Delete Old Scans + +This script finds and **permanently deletes** old scans from the FossID Workbench that haven't been updated in a specified number of days. + +## ⚠️ Important Warning + +**This script PERMANENTLY DELETES scans from your FossID Workbench instance. Unlike archiving, deleted scans cannot be recovered. Use with extreme caution!** + +## Features + +- Lists all scans and identifies those not updated within a specified timeframe +- Displays scan information in a formatted table including project name, scan name, age, and last modified date +- Supports dry-run mode to preview which scans would be deleted without actually deleting them +- Skips already archived scans +- Requires explicit confirmation before proceeding with deletion +- Comprehensive logging and error handling + +## Prerequisites + +- Python 3.6 or higher +- Access to a FossID Workbench instance +- Valid Workbench API credentials (username and token) + +## Installation + +1. Clone or download this script +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +## Configuration + +You can provide the Workbench connection details in two ways: + +### Option 1: Command Line Arguments +```bash +python delete_old_scans.py --workbench-url "https://your-workbench.com" --workbench-user "your-username" --workbench-token "your-api-token" +``` + +### Option 2: Environment Variables +Set the following environment variables: +- `WORKBENCH_URL`: Your FossID Workbench URL +- `WORKBENCH_USER`: Your Workbench username +- `WORKBENCH_TOKEN`: Your Workbench API token + +```bash +export WORKBENCH_URL="https://your-workbench.com" +export WORKBENCH_USER="your-username" +export WORKBENCH_TOKEN="your-api-token" +``` + +## Usage + +### Basic Usage +Delete scans older than 365 days (default): +```bash +python delete_old_scans.py +``` + +### Specify Custom Age Threshold +Delete scans older than 180 days: +```bash +python delete_old_scans.py --days 180 +``` + +### Dry Run Mode (Recommended First Step) +Preview which scans would be deleted without actually deleting them: +```bash +python delete_old_scans.py --days 180 --dry-run +``` + +### Complete Example +```bash +python delete_old_scans.py \ + --workbench-url "https://your-workbench.com" \ + --workbench-user "your-username" \ + --workbench-token "your-api-token" \ + --days 90 \ + --dry-run +``` + +## Command Line Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--workbench-url` | The Workbench API URL | From `WORKBENCH_URL` env var | +| `--workbench-user` | Your Workbench username | From `WORKBENCH_USER` env var | +| `--workbench-token` | Your Workbench API token | From `WORKBENCH_TOKEN` env var | +| `--days` | Scan age in days to consider old | 365 | +| `--dry-run` | Preview mode - show what would be deleted without deleting | False | + +## Safety Features + +1. **Dry Run Mode**: Always test with `--dry-run` first to see what would be deleted +2. **Explicit Confirmation**: Requires typing "yes" (not just "y") to proceed with deletion +3. **Clear Warnings**: Multiple warnings about the permanent nature of deletion +4. **Archived Scan Protection**: Skips scans that are already archived +5. **Detailed Logging**: Comprehensive logging of all operations + +## Sample Output + +``` +2024-01-15 10:30:00 - INFO - Fetching scans from Workbench... +2024-01-15 10:30:02 - INFO - Finding scans last updated more than 180 days ago... +2024-01-15 10:30:05 - INFO - These scans will be PERMANENTLY DELETED: + +╒═══════════════════╤═══════════════════════╤═══════════════════╤═══════════════════════╕ +│ PROJECT NAME │ SCAN NAME │ SCAN AGE (days) │ LAST MODIFIED │ +╞═══════════════════╪═══════════════════════╪═══════════════════╪═══════════════════════╡ +│ Legacy Project │ old-scan-v1.0 │ 245 │ 2023-05-15 14:30:22 │ +│ Deprecated App │ unused-scan-2022 │ 312 │ 2023-03-08 09:15:45 │ +╘═══════════════════╧═══════════════════════╧═══════════════════╧═══════════════════════╛ + +⚠️ WARNING: This operation will PERMANENTLY DELETE the scans listed above! + Unlike archiving, deleted scans cannot be recovered. +Are you absolutely sure you want to proceed? (yes/no): +``` + +## Best Practices + +1. **Always start with dry-run**: Use `--dry-run` to preview what would be deleted +2. **Start with a smaller timeframe**: Begin with a shorter period (e.g., 30 days) to test +3. **Review the output carefully**: Check the table of scans to be deleted before confirming +4. **Consider archiving first**: Use the archive script instead if you might need the scans later +5. **Backup important data**: Ensure any important scan results are backed up before deletion + +## Troubleshooting + +### Common Issues + +1. **Authentication Error**: Verify your Workbench URL, username, and API token +2. **Network Timeout**: Check your network connection and Workbench availability +3. **Permission Denied**: Ensure your user account has permission to delete scans + +### Error Messages + +- `Failed to retrieve scans from Workbench`: Check your connection details and network +- `Error deleting scan`: The scan may be in use or you may lack permissions +- `API call failed`: Network or authentication issue + +## Related Scripts + +- `archive-stale-scans/`: Archives old scans instead of deleting them (recommended alternative) +- `delete-scan/`: Deletes a specific scan by scan code + +## Support + +For issues or questions, please refer to the FossID Workbench API documentation or contact your system administrator. \ No newline at end of file diff --git a/delete-old-scans/delete_old_scans.py b/delete-old-scans/delete_old_scans.py new file mode 100755 index 0000000..aff9230 --- /dev/null +++ b/delete-old-scans/delete_old_scans.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +""" +This script finds and deletes old scans from the FossID Workbench. + +It lists all scans, identifies the ones that have not been updated in a specified number +of days, and deletes them permanently. It supports a dry-run mode to display the scans +that would be deleted. Unlike archiving, deletion is permanent and cannot be undone. +""" + +import sys +import json +from datetime import datetime, timedelta +import logging +import argparse +import os +from typing import List, Tuple, Dict, Any + +import requests +from tabulate import tabulate + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) + +# Create a session object for making requests +session = requests.Session() + + +def make_api_call(url: str, payload: Dict[str, Any]) -> Dict[str, Any]: + """Helper function to make API calls.""" + try: + logging.debug("Making API call with payload: %s", json.dumps(payload, indent=2)) + response = session.post(url, json=payload, timeout=10) + response.raise_for_status() + logging.debug("Received response: %s", response.text) + return response.json().get("data", {}) + except requests.exceptions.RequestException as e: + logging.error("API call failed: %s", str(e)) + raise + except json.JSONDecodeError as e: + logging.error("Failed to parse JSON response: %s", str(e)) + raise + + +def list_scans(url: str, username: str, token: str) -> Dict[str, Any]: + """List all scans.""" + payload = { + "group": "scans", + "action": "list_scans", + "data": {"username": username, "key": token}, + } + return make_api_call(url, payload) + + +def get_scan_info( + url: str, username: str, token: str, scan_code: str +) -> Dict[str, Any]: + """Get scan info for each scan.""" + payload = { + "group": "scans", + "action": "get_information", + "data": {"username": username, "key": token, "scan_code": scan_code}, + } + return make_api_call(url, payload) + + +def get_project_info( + url: str, username: str, token: str, project_code: str +) -> Dict[str, Any]: + """Get the project name for each scan's project code.""" + payload = { + "group": "projects", + "action": "get_information", + "data": {"username": username, "key": token, "project_code": project_code}, + } + return make_api_call(url, payload) + + +def delete_scan(url: str, username: str, token: str, scan_code: str) -> bool: + """Delete a scan permanently.""" + payload = { + "group": "scans", + "action": "delete", + "data": {"username": username, "key": token, "scan_code": scan_code}, + } + try: + response = session.post(url, json=payload, timeout=10) + response.raise_for_status() + return response.status_code == 200 + except requests.exceptions.RequestException as e: + logging.error("Error deleting scan %s: %s", scan_code, str(e)) + return False + + +def find_old_scans( + scans: Dict[str, Any], url: str, username: str, token: str, days: int +) -> List[Tuple[str, str, str, datetime, datetime]]: + """Find scans that were last updated before the specified days.""" + old_scans = [] + time_limit = datetime.now() - timedelta(days=days) + for scan_info in scans.values(): + scan_code = scan_info["code"] + scan_details = get_scan_info(url, username, token, scan_code) + if scan_details["is_archived"]: + continue + creation_date = datetime.strptime(scan_details["created"], "%Y-%m-%d %H:%M:%S") + update_date = datetime.strptime(scan_details["updated"], "%Y-%m-%d %H:%M:%S") + if update_date < time_limit: + project_code = scan_details.get("project_code") + project_name = "No Project" + if project_code: + project_info = get_project_info(url, username, token, project_code) + project_name = project_info.get("project_name", "Unknown Project") + old_scans.append( + ( + project_name, + scan_details["name"], + scan_code, + creation_date, + update_date, + ) + ) + return old_scans + + +def display_scans(scans: List[Tuple[str, str, str, datetime, datetime]], dry_run: bool): + """Display scans that would be deleted.""" + if dry_run: + logging.info("Dry Run enabled! These scans would be PERMANENTLY DELETED:") + else: + logging.info("These scans will be PERMANENTLY DELETED:") + headers = ["PROJECT NAME", "SCAN NAME", "SCAN AGE (days)", "LAST MODIFIED"] + table = [ + [project_name, scan_name, (datetime.now() - update_date).days, update_date] + for project_name, scan_name, _, _, update_date in scans + ] + print(tabulate(table, headers, tablefmt="fancy_grid")) + + +def fetch_and_find_old_scans( + url: str, username: str, token: str, days: int +) -> List[Tuple[str, str, str, datetime, datetime]]: + """Fetch scans and find the ones that are older than the specified number of days.""" + logging.info("Fetching scans from Workbench...") + try: + scans = list_scans(url, username, token) + except requests.exceptions.RequestException as e: + logging.error("Failed to retrieve scans from Workbench: %s", str(e)) + logging.error("Please double-check the Workbench URL, Username, and Token.") + sys.exit(1) + logging.info("Finding scans last updated more than %d days ago...", days) + return find_old_scans(scans, url, username, token, days) + + +def delete_scans( + url: str, + username: str, + token: str, + scans: List[Tuple[str, str, str, datetime, datetime]], +): + """Delete the specified scans permanently.""" + for project_name, scan_name, scan_code, _, _ in scans: + logging.info("Deleting scan: %s (%s)", scan_name, project_name) + if delete_scan(url, username, token, scan_code): + logging.info("Successfully deleted scan: %s", scan_name) + else: + logging.error("Failed to delete scan: %s", scan_name) + + +def main(url: str, username: str, token: str, days: int, dry_run: bool): + """Main function to delete old scans.""" + old_scans = fetch_and_find_old_scans(url, username, token, days) + if not old_scans: + logging.info("No scans were last updated more than %d days ago. Exiting.", days) + return + + display_scans(old_scans, dry_run) + + if dry_run: + return + + print("\n⚠️ WARNING: This operation will PERMANENTLY DELETE the scans listed above!") + print(" Unlike archiving, deleted scans cannot be recovered.") + confirmation = input("Are you absolutely sure you want to proceed? (yes/no): ") + if confirmation.lower() != "yes": + logging.info("Operation cancelled.") + return + + delete_scans(url, username, token, old_scans) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Delete old scans permanently.") + parser.add_argument("--workbench-url", type=str, help="The Workbench API URL") + parser.add_argument("--workbench-user", type=str, help="Your Workbench username") + parser.add_argument("--workbench-token", type=str, help="Your Workbench API token") + parser.add_argument( + "--days", + type=int, + default=365, + help="Scan age in days to consider old (default: 365)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Display scans that would be deleted without actually deleting them", + ) + + args = parser.parse_args() + + api_url = args.workbench_url or os.getenv("WORKBENCH_URL") + api_username = args.workbench_user or os.getenv("WORKBENCH_USER") + api_token = args.workbench_token or os.getenv("WORKBENCH_TOKEN") + + if not api_url or not api_username or not api_token: + logging.error( + "The Workbench URL, username, and token must be provided either as arguments\n" + "or environment variables." + ) + sys.exit(1) + + # Sanity check for Workbench URL + if not api_url.endswith("/api.php"): + api_url += "/api.php" + + main(api_url, api_username, api_token, args.days, args.dry_run) \ No newline at end of file diff --git a/delete-old-scans/requirements.txt b/delete-old-scans/requirements.txt new file mode 100644 index 0000000..0615df6 --- /dev/null +++ b/delete-old-scans/requirements.txt @@ -0,0 +1,6 @@ +requests +python-dotenv +pycodestyle +pylint +black +tabulate \ No newline at end of file