diff --git a/util/jenkins/retired_user_cert_remover/retired_user_cert_remover.py b/util/jenkins/retired_user_cert_remover/retired_user_cert_remover.py index b3be873fe..f92d929ab 100644 --- a/util/jenkins/retired_user_cert_remover/retired_user_cert_remover.py +++ b/util/jenkins/retired_user_cert_remover/retired_user_cert_remover.py @@ -1,121 +1,135 @@ """ -Script to delete downloadable certificates of inactive users from S3, based on RDS MySQL database entries. +Script to delete downloadable certificates of inactive users from S3 by calling +the LMS retire_certs_s3 API endpoint. -Usage: - python retired_user_cert_remover.py --db-host=my-db-host --db-name=my-db --dry-run +This script no longer connects directly to RDS. All certificate discovery, S3 +deletion, and database updates are handled by the LMS API endpoint: + POST /api/certificates/v1/retire_certs_s3 + +The LMS endpoint requires an OAuth token obtained by exchanging client_id / +client_secret (stored in AWS Secrets Manager) for a bearer token. -Arguments: - --db-host The RDS database host. - --db-name The database name. - --dry-run Run the script in dry-run mode (logs actions without deleting). - --db-user The RDS database user (also settable via DB_USER env var). - --db-password The RDS database password (also settable via DB_PASSWORD env var). +Usage: + python retired_user_cert_remover.py \ + --lms-host=https://lms.example.com \ + --client-id= \ + --client-secret= \ + [--dry-run] Environment Variables: - DB_USER Database username (alternative to --db-user). - DB_PASSWORD Database password (alternative to --db-password). - -Functionality: - - Connects to an RDS MySQL database and fetches certificates for inactive users. - - Targets only certificates with a valid download URL and status 'downloadable'. - - Deletes corresponding certificate files from S3 (verify and download locations). - - Supports dry-run mode to simulate deletions for review. - -Example: - export DB_USER=admin - export DB_PASSWORD=securepass - python retired_user_cert_remover.py --db-host=mydb.amazonaws.com --db-name=edxapp --dry-run + LMS_CLIENT_ID OAuth client id (alternative to --client-id). + LMS_CLIENT_SECRET OAuth client secret (alternative to --client-secret). + +Dry-run: + Passes ?dry_run=true to the API. The LMS logs what would be deleted without + making any changes to S3 or the database. """ -import boto3 -from botocore.exceptions import ClientError -import pymysql +import logging +import sys + import backoff import click -import sys -import logging +import requests + +MAX_TOKEN_ATTEMPTS = 3 +MAX_API_ATTEMPTS = 3 -MAX_TRIES = 5 -# Configure logging -LOGGER = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +LOGGER = logging.getLogger(__name__) -class S3BotoWrapper: - def __init__(self): - self.client = boto3.client("s3") +def get_oauth_token(lms_host, client_id, client_secret): + """ + Exchange client credentials for a bearer token via LMS DOT. + + Returns the access token string, or exits on failure. + """ + token_url = f'{lms_host.rstrip("/")}/oauth2/access_token/' + + @backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_TOKEN_ATTEMPTS) + def _request(): + response = requests.post( + token_url, + data={ + 'grant_type': 'client_credentials', + 'client_id': client_id, + 'client_secret': client_secret, + }, + timeout=30, + ) + response.raise_for_status() + return response.json()['access_token'] - @backoff.on_exception(backoff.expo, ClientError, max_tries=MAX_TRIES) - def delete_object(self, bucket, key): - return self.client.delete_object(Bucket=bucket, Key=key) + try: + token = _request() + LOGGER.info('Successfully obtained OAuth token from %s', token_url) + return token + except Exception as exc: + LOGGER.error('Failed to obtain OAuth token: %s', exc) + sys.exit(1) -def fetch_certificates_to_delete(db_host, db_user, db_password, db_name): +def call_retire_certs_api(lms_host, token, dry_run): + """ + Call POST /api/certificates/v1/retire_certs_s3 on the LMS. + + Returns the parsed JSON response body. + Retries up to MAX_API_ATTEMPTS times on transient network errors. + Exits with code 1 if the call fails entirely after retries. + Exits with code 2 if the call returns 207 (partial failure). + """ + url = f'{lms_host.rstrip("/")}/api/certificates/v1/retire_certs_s3' + params = {'dry_run': 'true'} if dry_run else {} + headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'} + + @backoff.on_exception(backoff.expo, requests.RequestException, max_tries=MAX_API_ATTEMPTS) + def _request(): + response = requests.post(url, params=params, headers=headers, timeout=600) + # Retry on 5xx server errors; 2xx/207/4xx are handled below. + if response.status_code >= 500: + response.raise_for_status() + return response + + LOGGER.info('Calling %s (dry_run=%s)', url, dry_run) try: - connection = pymysql.connect(host=db_host, user=db_user, password=db_password, database=db_name) - cursor = connection.cursor() - logging.info("Running query on database...") - cursor.execute(""" - SELECT - au.id as "LMS_USER_ID", - gc.course_id as "COURSE_RUN_ID", - gc.id as "CERTIFICATE_ID", - gc.download_url as "CERTIFICATE_URL", - gc.download_uuid as "DOWNLOAD_UUID", - gc.verify_uuid as "VERIFY_UUID" - FROM - auth_user as au - JOIN - certificates_generatedcertificate as gc - ON - gc.user_id = au.id - WHERE - au.is_active = 0 - AND gc.download_url LIKE '%%https://%%' - AND gc.status = 'downloadable' - ORDER BY - LMS_USER_ID, - COURSE_RUN_ID; - """) - result = cursor.fetchall() - cursor.close() - connection.close() - return result - except Exception as ex: - logging.error(f"Database query failed with error: {ex}") + response = _request() + except requests.RequestException as exc: + LOGGER.error('HTTP request to retire_certs_s3 failed after retries: %s', exc) sys.exit(1) + body = {} + try: + body = response.json() + except ValueError: + pass + + if response.status_code == 200: + LOGGER.info('retire_certs_s3 completed successfully: %s', body) + return body -def delete_certificates_from_s3(certificates, dry_run): - s3_client = S3BotoWrapper() - for cert in certificates: - verify_uuid = cert[5] # VERIFY_UUID - download_uuid = cert[4] # DOWNLOAD_UUID + if response.status_code == 207: + LOGGER.warning( + 'retire_certs_s3 completed with partial failures: processed=%s failed=%s', + body.get('processed'), body.get('failed'), + ) + sys.exit(2) - verify_key = f"cert/{verify_uuid}" - download_key = f"downloads/{download_uuid}/Certificate.pdf" - try: - if dry_run: - logging.info(f"[Dry Run] Would delete {verify_key} from S3") - logging.info(f"[Dry Run] Would delete {download_key} from S3") - else: - logging.info(f"Deleting {verify_key} from S3...") - s3_client.delete_object("verify.edx.org", verify_key) - logging.info(f"Deleting {download_key} from S3...") - s3_client.delete_object("verify.edx.org", download_key) - except ClientError as e: - logging.error(f"Error deleting {verify_key} or {download_key}: {e}") + LOGGER.error( + 'retire_certs_s3 returned unexpected status %s: %s', + response.status_code, body, + ) + sys.exit(1) @click.command() -@click.option('--db-host', '-h', required=True, help='Database host') -@click.option('--db-user', envvar='DB_USER', required=True, help='Database user') -@click.option('--db-password', envvar='DB_PASSWORD', required=True, help='Database password') -@click.option('--db-name', '-db', required=True, help='Database name') -@click.option('--dry-run', is_flag=True, help='Run the script in dry-run mode without making any changes') -def controller(db_host, db_user, db_password, db_name, dry_run): - certificates = fetch_certificates_to_delete(db_host, db_user, db_password, db_name) - delete_certificates_from_s3(certificates, dry_run) +@click.option('--lms-host', required=True, help='Base URL of the LMS (e.g. https://lms.edx.org)') +@click.option('--client-id', envvar='LMS_CLIENT_ID', required=True, help='OAuth DOT client id') +@click.option('--client-secret', envvar='LMS_CLIENT_SECRET', required=True, help='OAuth DOT client secret') +@click.option('--dry-run', is_flag=True, help='Run in dry-run mode without making any changes') +def controller(lms_host, client_id, client_secret, dry_run): + token = get_oauth_token(lms_host, client_id, client_secret) + call_retire_certs_api(lms_host, token, dry_run) if __name__ == '__main__':