From c884cede5d4ff05a9f4ef856dc55a1cc326e5fc5 Mon Sep 17 00:00:00 2001 From: Alam-2U Date: Tue, 19 May 2026 09:34:54 +0000 Subject: [PATCH] fix: update migration script and add backfill command support --- forum/__init__.py | 2 +- forum/backends/mysql/models.py | 4 +- .../forum_backfill_author_username.py | 152 ++++++++++++++++++ .../commands/forum_backfill_closed_by.py | 95 +++++++++++ .../commands/forum_backfill_timestamps.py | 106 ++++++++++++ forum/migration_helpers.py | 37 +++-- ...009_alter_content_created_at_updated_at.py | 39 +++++ 7 files changed, 420 insertions(+), 15 deletions(-) create mode 100644 forum/management/commands/forum_backfill_author_username.py create mode 100644 forum/management/commands/forum_backfill_closed_by.py create mode 100644 forum/management/commands/forum_backfill_timestamps.py create mode 100644 forum/migrations/0009_alter_content_created_at_updated_at.py diff --git a/forum/__init__.py b/forum/__init__.py index b180362d..adc885aa 100644 --- a/forum/__init__.py +++ b/forum/__init__.py @@ -2,4 +2,4 @@ Openedx forum app. """ -__version__ = "0.6.2" +__version__ = "0.6.3" diff --git a/forum/backends/mysql/models.py b/forum/backends/mysql/models.py index 9c035a99..806b4292 100644 --- a/forum/backends/mysql/models.py +++ b/forum/backends/mysql/models.py @@ -134,10 +134,10 @@ class Content(models.Model): null=True ) created_at: models.DateTimeField[datetime, datetime] = models.DateTimeField( - auto_now_add=True + default=timezone.now ) updated_at: models.DateTimeField[datetime, datetime] = models.DateTimeField( - auto_now=True + default=timezone.now ) is_spam: models.BooleanField[bool, bool] = models.BooleanField( default=False, diff --git a/forum/management/commands/forum_backfill_author_username.py b/forum/management/commands/forum_backfill_author_username.py new file mode 100644 index 00000000..a6c9a9cd --- /dev/null +++ b/forum/management/commands/forum_backfill_author_username.py @@ -0,0 +1,152 @@ +"""Backfill author_username for threads/comments migrated before migration 0004. + +This command only updates author_username and retired_username (for threads) +where those fields are currently NULL. All other fields are left untouched. +""" + +from typing import Any + +from bson import ObjectId +from django.core.management.base import BaseCommand, CommandParser + +from forum.migration_helpers import get_user_or_none +from forum.models import Comment, CommentThread, MongoContent +from forum.mongo import get_database + + +class Command(BaseCommand): + """Backfill author_username/retired_username for rows that lack it.""" + + help = ( + "Backfill author_username (and retired_username) for CommentThreads and " + "Comments that were migrated before migration 0004 added those fields." + ) + + def add_arguments(self, parser: CommandParser) -> None: + """Add arguments to the command.""" + parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would be updated without writing to the database.", + ) + + def handle(self, *args: str, **options: dict[str, Any]) -> None: + """Handle the command.""" + dry_run: bool = bool(options["dry_run"]) + db = get_database() + + if dry_run: + self.stdout.write("DRY RUN — no changes will be written.") + + thread_count = self._backfill_threads(db, dry_run) + comment_count = self._backfill_comments(db, dry_run) + + self.stdout.write( + self.style.SUCCESS( + f"Done. Threads updated: {thread_count}, Comments updated: {comment_count}" + ) + ) + + def _resolve_username( + self, + mongo_doc: dict[str, Any], + ) -> tuple[str | None, str | None]: + """ + Return (author_username, retired_username) using the same fallback logic + as the main migration: MongoDB value → retired_username → current username. + """ + author_username: str | None = mongo_doc.get("author_username") + retired_username: str | None = mongo_doc.get("retired_username") + + if not author_username: + if retired_username: + author_username = retired_username + else: + author = get_user_or_none(mongo_doc.get("author_id")) + if author: + author_username = author.username + + return author_username, retired_username + + def _backfill_threads(self, db: Any, dry_run: bool) -> int: + """Backfill CommentThreads with NULL author_username.""" + null_threads = CommentThread.objects.filter(author_username__isnull=True) + count = 0 + + for thread in null_threads.iterator(): + mapping = MongoContent.objects.filter( + content_object_id=thread.pk, + content_type=thread.content_type, + ).first() + if not mapping: + self.stderr.write( + f"No MongoContent mapping for CommentThread pk={thread.pk}, skipping." + ) + continue + + mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)}) + if not mongo_doc: + self.stderr.write( + f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping." + ) + continue + + author_username, retired_username = self._resolve_username(mongo_doc) + if not author_username: + self.stderr.write( + f"Could not resolve author_username for CommentThread pk={thread.pk}, skipping." + ) + continue + + self.stdout.write( + f"CommentThread pk={thread.pk}: author_username={author_username!r}, " + f"retired_username={retired_username!r}" + ) + if not dry_run: + thread.author_username = author_username + thread.retired_username = retired_username + thread.save(update_fields=["author_username", "retired_username"]) + count += 1 + + return count + + def _backfill_comments(self, db: Any, dry_run: bool) -> int: + """Backfill Comments with NULL author_username.""" + null_comments = Comment.objects.filter(author_username__isnull=True) + count = 0 + + for comment in null_comments.iterator(): + mapping = MongoContent.objects.filter( + content_object_id=comment.pk, + content_type=comment.content_type, + ).first() + if not mapping: + self.stderr.write( + f"No MongoContent mapping for Comment pk={comment.pk}, skipping." + ) + continue + + mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)}) + if not mongo_doc: + self.stderr.write( + f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping." + ) + continue + + author_username, retired_username = self._resolve_username(mongo_doc) + if not author_username: + self.stderr.write( + f"Could not resolve author_username for Comment pk={comment.pk}, skipping." + ) + continue + + self.stdout.write( + f"Comment pk={comment.pk}: author_username={author_username!r}" + ) + if not dry_run: + comment.author_username = author_username + comment.retired_username = retired_username + comment.save(update_fields=["author_username", "retired_username"]) + count += 1 + + return count diff --git a/forum/management/commands/forum_backfill_closed_by.py b/forum/management/commands/forum_backfill_closed_by.py new file mode 100644 index 00000000..0dd350f3 --- /dev/null +++ b/forum/management/commands/forum_backfill_closed_by.py @@ -0,0 +1,95 @@ +"""Backfill closed_by and close_reason_code for threads missing those values. + +This command targets CommentThread rows where closed=True but closed_by is NULL +(i.e. threads that were migrated before the migration script read these fields +from MongoDB). Only closed_by and close_reason_code are updated — all other +fields are left untouched. +""" + +from typing import Any + +from bson import ObjectId +from django.core.management.base import BaseCommand, CommandParser + +from forum.migration_helpers import get_user_or_none +from forum.models import CommentThread, MongoContent +from forum.mongo import get_database + + +class Command(BaseCommand): + """Backfill closed_by and close_reason_code for CommentThreads that lack them.""" + + help = ( + "Backfill closed_by and close_reason_code for closed CommentThreads " + "that were migrated before the migration script handled those fields." + ) + + def add_arguments(self, parser: CommandParser) -> None: + """Add arguments to the command.""" + parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would be updated without writing to the database.", + ) + + def handle(self, *args: str, **options: dict[str, Any]) -> None: + """Handle the command.""" + dry_run: bool = bool(options["dry_run"]) + db = get_database() + + if dry_run: + self.stdout.write("DRY RUN — no changes will be written.") + + # Target threads that are closed but have no closed_by recorded + null_closed_by_threads = CommentThread.objects.filter( + closed=True, + closed_by__isnull=True, + ) + count = 0 + + for thread in null_closed_by_threads.iterator(): + mapping = MongoContent.objects.filter( + content_object_id=thread.pk, + content_type=thread.content_type, + ).first() + if not mapping: + self.stderr.write( + f"No MongoContent mapping for CommentThread pk={thread.pk}, skipping." + ) + continue + + mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)}) + if not mongo_doc: + self.stderr.write( + f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping." + ) + continue + + closed_by_id = mongo_doc.get("closed_by_id") + close_reason_code = mongo_doc.get("close_reason_code") + + if not closed_by_id: + # Thread is closed in MySQL but MongoDB has no closed_by_id — + # nothing to backfill, skip silently. + continue + + closed_by = get_user_or_none(closed_by_id) + if not closed_by: + self.stderr.write( + f"User closed_by_id={closed_by_id} not found in MySQL for " + f"CommentThread pk={thread.pk}, skipping." + ) + continue + + self.stdout.write( + f"CommentThread pk={thread.pk}: " + f"closed_by={closed_by.username!r}, " + f"close_reason_code={close_reason_code!r}" + ) + if not dry_run: + thread.closed_by = closed_by + thread.close_reason_code = close_reason_code + thread.save(update_fields=["closed_by_id", "close_reason_code"]) + count += 1 + + self.stdout.write(self.style.SUCCESS(f"Done. CommentThreads updated: {count}")) diff --git a/forum/management/commands/forum_backfill_timestamps.py b/forum/management/commands/forum_backfill_timestamps.py new file mode 100644 index 00000000..ef77ad2e --- /dev/null +++ b/forum/management/commands/forum_backfill_timestamps.py @@ -0,0 +1,106 @@ +"""Backfill created_at and updated_at for threads/comments migrated before +migration 0009 removed auto_now_add/auto_now from the Content model. + +Before 0009, Django's auto_now_add/auto_now always overwrote any value passed +during create/save, so all migrated content ended up with the migration run +time instead of the original MongoDB timestamps. + +This command reads the original timestamps from MongoDB and writes them back +using QuerySet.update(), which bypasses auto_now* even on older schema versions. +Only created_at and updated_at are touched — all other fields are left as-is. +""" + +from typing import Any + +from bson import ObjectId +from django.core.management.base import BaseCommand, CommandParser + +from forum.migration_helpers import parse_mongo_datetime +from forum.models import Comment, CommentThread, MongoContent +from forum.mongo import get_database + + +class Command(BaseCommand): + """Backfill created_at/updated_at from MongoDB for already-migrated content.""" + + help = ( + "Backfill created_at and updated_at for CommentThreads and Comments whose " + "timestamps were set to migration time instead of the original MongoDB value." + ) + + def add_arguments(self, parser: CommandParser) -> None: + """Add arguments to the command.""" + parser.add_argument( + "--dry-run", + action="store_true", + help="Print what would be updated without writing to the database.", + ) + + def handle(self, *args: str, **options: dict[str, Any]) -> None: + """Handle the command.""" + dry_run: bool = bool(options["dry_run"]) + db = get_database() + + if dry_run: + self.stdout.write("DRY RUN — no changes will be written.") + + thread_count = self._backfill(db, dry_run, CommentThread, "CommentThread") + comment_count = self._backfill(db, dry_run, Comment, "Comment") + + self.stdout.write( + self.style.SUCCESS( + f"Done. CommentThreads updated: {thread_count}, Comments updated: {comment_count}" + ) + ) + + def _backfill( + self, + db: Any, + dry_run: bool, + model_class: type[CommentThread] | type[Comment], + label: str, + ) -> int: + """Backfill created_at/updated_at for all records of a given model.""" + count = 0 + + for obj in model_class.objects.iterator(): + mapping = MongoContent.objects.filter( + content_object_id=obj.pk, + content_type=obj.content_type, + ).first() + if not mapping: + self.stderr.write( + f"No MongoContent mapping for {label} pk={obj.pk}, skipping." + ) + continue + + mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)}) + if not mongo_doc: + self.stderr.write( + f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping." + ) + continue + + created_at = parse_mongo_datetime(mongo_doc.get("created_at")) + updated_at = parse_mongo_datetime(mongo_doc.get("updated_at")) + + if not created_at or not updated_at: + self.stderr.write( + f"{label} pk={obj.pk}: missing timestamps in MongoDB, skipping." + ) + continue + + self.stdout.write( + f"{label} pk={obj.pk}: " + f"created_at={created_at.isoformat()}, " + f"updated_at={updated_at.isoformat()}" + ) + if not dry_run: + # Use QuerySet.update() — bypasses auto_now* at the SQL level. + model_class.objects.filter(pk=obj.pk).update( + created_at=created_at, + updated_at=updated_at, + ) + count += 1 + + return count diff --git a/forum/migration_helpers.py b/forum/migration_helpers.py index b5ecebde..7cad1a7d 100644 --- a/forum/migration_helpers.py +++ b/forum/migration_helpers.py @@ -1,8 +1,8 @@ """Migration commands helper methods.""" -from typing import Any import logging from datetime import datetime +from typing import Any from django.contrib.auth.models import User # pylint: disable=E5142 from django.core.management.base import OutputWrapper @@ -24,8 +24,7 @@ Subscription, UserVote, ) -from forum.utils import make_aware, get_trunc_title - +from forum.utils import get_trunc_title, make_aware logger = logging.getLogger(__name__) @@ -105,6 +104,23 @@ def migrate_content(db: Database[dict[str, Any]], course_id: str) -> None: migrate_subscriptions(db, content["_id"]) +def _resolve_thread_actors( + thread_data: dict[str, Any], +) -> tuple["User | None", "User | None"]: + """Return (deleted_by, closed_by) users resolved from thread_data.""" + deleted_by = ( + get_user_or_none(thread_data["deleted_by"]) + if thread_data.get("deleted_by") + else None + ) + closed_by = ( + get_user_or_none(thread_data["closed_by_id"]) + if thread_data.get("closed_by_id") + else None + ) + return deleted_by, closed_by + + def create_or_update_thread(thread_data: dict[str, Any]) -> None: """Create or update a thread.""" author = get_user_or_none(thread_data["author_id"]) @@ -129,10 +145,7 @@ def create_or_update_thread(thread_data: dict[str, Any]) -> None: mongo_id=mongo_thread_id, ) if not mongo_content.content_object_id: - # Get deleted_by user if deleted_by field exists in MongoDB - deleted_by = None - if thread_data.get("deleted_by"): - deleted_by = get_user_or_none(thread_data["deleted_by"]) + deleted_by, closed_by = _resolve_thread_actors(thread_data) thread = CommentThread.objects.create( author=author, @@ -146,6 +159,8 @@ def create_or_update_thread(thread_data: dict[str, Any]) -> None: anonymous=thread_data.get("anonymous", False), anonymous_to_peers=thread_data.get("anonymous_to_peers", False), closed=thread_data.get("closed", False), + closed_by=closed_by, + close_reason_code=thread_data.get("close_reason_code"), pinned=thread_data.get("pinned", False), created_at=parse_mongo_datetime(thread_data["created_at"]), updated_at=parse_mongo_datetime(thread_data["updated_at"]), @@ -165,10 +180,7 @@ def create_or_update_thread(thread_data: dict[str, Any]) -> None: # Update existing thread with latest data from MongoDB thread = CommentThread.objects.get(pk=mongo_content.content_object_id) - # Get deleted_by user if needed - deleted_by = None - if thread_data.get("deleted_by"): - deleted_by = get_user_or_none(thread_data["deleted_by"]) + deleted_by, closed_by = _resolve_thread_actors(thread_data) # Update all fields that might have changed thread.title = get_trunc_title(thread_data.get("title", "")) @@ -178,6 +190,8 @@ def create_or_update_thread(thread_data: dict[str, Any]) -> None: thread.anonymous = thread_data.get("anonymous", False) thread.anonymous_to_peers = thread_data.get("anonymous_to_peers", False) thread.closed = thread_data.get("closed", False) + thread.closed_by = closed_by # type: ignore[assignment] + thread.close_reason_code = thread_data.get("close_reason_code") thread.pinned = thread_data.get("pinned", False) thread.updated_at = parse_mongo_datetime(thread_data["updated_at"]) # type: ignore[assignment] thread.last_activity_at = parse_mongo_datetime(thread_data["last_activity_at"]) @@ -554,7 +568,6 @@ def log_deletion( def enable_mysql_backend_for_course(course_id: str) -> None: """Enable MySQL backend waffle flag for a course.""" from opaque_keys.edx.keys import CourseKey - from openedx.core.djangoapps.waffle_utils.models import ( # type: ignore[import-not-found] WaffleFlagCourseOverrideModel, ) diff --git a/forum/migrations/0009_alter_content_created_at_updated_at.py b/forum/migrations/0009_alter_content_created_at_updated_at.py new file mode 100644 index 00000000..b599d8ff --- /dev/null +++ b/forum/migrations/0009_alter_content_created_at_updated_at.py @@ -0,0 +1,39 @@ +# Generated migration to fix created_at/updated_at on Content so that +# historical timestamps can be preserved during MongoDB → MySQL migration. +# Removes auto_now_add/auto_now behaviour and replaces with a plain default +# so the migration script (and the backfill command) can write correct values. + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("forum", "0008_discussionmuterecord_and_more"), + ] + + operations = [ + # Comment + migrations.AlterField( + model_name="comment", + name="created_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + migrations.AlterField( + model_name="comment", + name="updated_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + # CommentThread + migrations.AlterField( + model_name="commentthread", + name="created_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + migrations.AlterField( + model_name="commentthread", + name="updated_at", + field=models.DateTimeField(default=django.utils.timezone.now), + ), + ]