Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion forum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Openedx forum app.
"""

__version__ = "0.6.2"
__version__ = "0.6.3"
4 changes: 2 additions & 2 deletions forum/backends/mysql/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,10 @@ class Content(models.Model):
null=True
)
created_at: models.DateTimeField[datetime, datetime] = models.DateTimeField(
auto_now_add=True
default=timezone.now
)
updated_at: models.DateTimeField[datetime, datetime] = models.DateTimeField(
auto_now=True
default=timezone.now
)
is_spam: models.BooleanField[bool, bool] = models.BooleanField(
default=False,
Expand Down
152 changes: 152 additions & 0 deletions forum/management/commands/forum_backfill_author_username.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Backfill author_username for threads/comments migrated before migration 0004.

This command only updates author_username and retired_username (for threads)
where those fields are currently NULL. All other fields are left untouched.
"""

from typing import Any

from bson import ObjectId
from django.core.management.base import BaseCommand, CommandParser

from forum.migration_helpers import get_user_or_none
from forum.models import Comment, CommentThread, MongoContent
from forum.mongo import get_database


class Command(BaseCommand):
"""Backfill author_username/retired_username for rows that lack it."""

help = (
"Backfill author_username (and retired_username) for CommentThreads and "
"Comments that were migrated before migration 0004 added those fields."
)

def add_arguments(self, parser: CommandParser) -> None:
"""Add arguments to the command."""
parser.add_argument(
"--dry-run",
action="store_true",
help="Print what would be updated without writing to the database.",
)

def handle(self, *args: str, **options: dict[str, Any]) -> None:
"""Handle the command."""
dry_run: bool = bool(options["dry_run"])
db = get_database()

if dry_run:
self.stdout.write("DRY RUN — no changes will be written.")

thread_count = self._backfill_threads(db, dry_run)
comment_count = self._backfill_comments(db, dry_run)

self.stdout.write(
self.style.SUCCESS(
f"Done. Threads updated: {thread_count}, Comments updated: {comment_count}"
)
)

def _resolve_username(
self,
mongo_doc: dict[str, Any],
) -> tuple[str | None, str | None]:
"""
Return (author_username, retired_username) using the same fallback logic
as the main migration: MongoDB value → retired_username → current username.
"""
author_username: str | None = mongo_doc.get("author_username")
retired_username: str | None = mongo_doc.get("retired_username")

if not author_username:
if retired_username:
author_username = retired_username
else:
author = get_user_or_none(mongo_doc.get("author_id"))
if author:
author_username = author.username

return author_username, retired_username

def _backfill_threads(self, db: Any, dry_run: bool) -> int:
"""Backfill CommentThreads with NULL author_username."""
null_threads = CommentThread.objects.filter(author_username__isnull=True)
count = 0

for thread in null_threads.iterator():
mapping = MongoContent.objects.filter(
content_object_id=thread.pk,
content_type=thread.content_type,
).first()
if not mapping:
self.stderr.write(
f"No MongoContent mapping for CommentThread pk={thread.pk}, skipping."
)
continue

mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)})
if not mongo_doc:
self.stderr.write(
f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping."
)
continue

author_username, retired_username = self._resolve_username(mongo_doc)
if not author_username:
self.stderr.write(
f"Could not resolve author_username for CommentThread pk={thread.pk}, skipping."
)
continue

self.stdout.write(
f"CommentThread pk={thread.pk}: author_username={author_username!r}, "
f"retired_username={retired_username!r}"
)
if not dry_run:
thread.author_username = author_username
thread.retired_username = retired_username
thread.save(update_fields=["author_username", "retired_username"])
count += 1

return count

def _backfill_comments(self, db: Any, dry_run: bool) -> int:
"""Backfill Comments with NULL author_username."""
null_comments = Comment.objects.filter(author_username__isnull=True)
count = 0

for comment in null_comments.iterator():
mapping = MongoContent.objects.filter(
content_object_id=comment.pk,
content_type=comment.content_type,
).first()
if not mapping:
self.stderr.write(
f"No MongoContent mapping for Comment pk={comment.pk}, skipping."
)
continue

mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)})
if not mongo_doc:
self.stderr.write(
f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping."
)
continue

author_username, retired_username = self._resolve_username(mongo_doc)
if not author_username:
self.stderr.write(
f"Could not resolve author_username for Comment pk={comment.pk}, skipping."
)
continue

self.stdout.write(
f"Comment pk={comment.pk}: author_username={author_username!r}"
)
if not dry_run:
comment.author_username = author_username
comment.retired_username = retired_username
comment.save(update_fields=["author_username", "retired_username"])
count += 1

return count
95 changes: 95 additions & 0 deletions forum/management/commands/forum_backfill_closed_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Backfill closed_by and close_reason_code for threads missing those values.

This command targets CommentThread rows where closed=True but closed_by is NULL
(i.e. threads that were migrated before the migration script read these fields
from MongoDB). Only closed_by and close_reason_code are updated — all other
fields are left untouched.
"""

from typing import Any

from bson import ObjectId
from django.core.management.base import BaseCommand, CommandParser

from forum.migration_helpers import get_user_or_none
from forum.models import CommentThread, MongoContent
from forum.mongo import get_database


class Command(BaseCommand):
"""Backfill closed_by and close_reason_code for CommentThreads that lack them."""

help = (
"Backfill closed_by and close_reason_code for closed CommentThreads "
"that were migrated before the migration script handled those fields."
)

def add_arguments(self, parser: CommandParser) -> None:
"""Add arguments to the command."""
parser.add_argument(
"--dry-run",
action="store_true",
help="Print what would be updated without writing to the database.",
)

def handle(self, *args: str, **options: dict[str, Any]) -> None:
"""Handle the command."""
dry_run: bool = bool(options["dry_run"])
db = get_database()

if dry_run:
self.stdout.write("DRY RUN — no changes will be written.")

# Target threads that are closed but have no closed_by recorded
null_closed_by_threads = CommentThread.objects.filter(
closed=True,
closed_by__isnull=True,
)
count = 0

for thread in null_closed_by_threads.iterator():
mapping = MongoContent.objects.filter(
content_object_id=thread.pk,
content_type=thread.content_type,
).first()
if not mapping:
self.stderr.write(
f"No MongoContent mapping for CommentThread pk={thread.pk}, skipping."
)
continue

mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)})
if not mongo_doc:
self.stderr.write(
f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping."
)
continue

closed_by_id = mongo_doc.get("closed_by_id")
close_reason_code = mongo_doc.get("close_reason_code")

if not closed_by_id:
# Thread is closed in MySQL but MongoDB has no closed_by_id —
# nothing to backfill, skip silently.
continue

closed_by = get_user_or_none(closed_by_id)
if not closed_by:
self.stderr.write(
f"User closed_by_id={closed_by_id} not found in MySQL for "
f"CommentThread pk={thread.pk}, skipping."
)
continue

self.stdout.write(
f"CommentThread pk={thread.pk}: "
f"closed_by={closed_by.username!r}, "
f"close_reason_code={close_reason_code!r}"
)
if not dry_run:
thread.closed_by = closed_by
thread.close_reason_code = close_reason_code
thread.save(update_fields=["closed_by_id", "close_reason_code"])
count += 1

self.stdout.write(self.style.SUCCESS(f"Done. CommentThreads updated: {count}"))
106 changes: 106 additions & 0 deletions forum/management/commands/forum_backfill_timestamps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Backfill created_at and updated_at for threads/comments migrated before
migration 0009 removed auto_now_add/auto_now from the Content model.

Before 0009, Django's auto_now_add/auto_now always overwrote any value passed
during create/save, so all migrated content ended up with the migration run
time instead of the original MongoDB timestamps.

This command reads the original timestamps from MongoDB and writes them back
using QuerySet.update(), which bypasses auto_now* even on older schema versions.
Only created_at and updated_at are touched — all other fields are left as-is.
"""

from typing import Any

from bson import ObjectId
from django.core.management.base import BaseCommand, CommandParser

from forum.migration_helpers import parse_mongo_datetime
from forum.models import Comment, CommentThread, MongoContent
from forum.mongo import get_database


class Command(BaseCommand):
"""Backfill created_at/updated_at from MongoDB for already-migrated content."""

help = (
"Backfill created_at and updated_at for CommentThreads and Comments whose "
"timestamps were set to migration time instead of the original MongoDB value."
)

def add_arguments(self, parser: CommandParser) -> None:
"""Add arguments to the command."""
parser.add_argument(
"--dry-run",
action="store_true",
help="Print what would be updated without writing to the database.",
)

def handle(self, *args: str, **options: dict[str, Any]) -> None:
"""Handle the command."""
dry_run: bool = bool(options["dry_run"])
db = get_database()

if dry_run:
self.stdout.write("DRY RUN — no changes will be written.")

thread_count = self._backfill(db, dry_run, CommentThread, "CommentThread")
comment_count = self._backfill(db, dry_run, Comment, "Comment")

self.stdout.write(
self.style.SUCCESS(
f"Done. CommentThreads updated: {thread_count}, Comments updated: {comment_count}"
)
)

def _backfill(
self,
db: Any,
dry_run: bool,
model_class: type[CommentThread] | type[Comment],
label: str,
) -> int:
"""Backfill created_at/updated_at for all records of a given model."""
count = 0

for obj in model_class.objects.iterator():
mapping = MongoContent.objects.filter(
content_object_id=obj.pk,
content_type=obj.content_type,
).first()
if not mapping:
self.stderr.write(
f"No MongoContent mapping for {label} pk={obj.pk}, skipping."
)
continue

mongo_doc = db.contents.find_one({"_id": ObjectId(mapping.mongo_id)})
if not mongo_doc:
self.stderr.write(
f"MongoDB document not found for mongo_id={mapping.mongo_id}, skipping."
)
continue

created_at = parse_mongo_datetime(mongo_doc.get("created_at"))
updated_at = parse_mongo_datetime(mongo_doc.get("updated_at"))

if not created_at or not updated_at:
self.stderr.write(
f"{label} pk={obj.pk}: missing timestamps in MongoDB, skipping."
)
continue

self.stdout.write(
f"{label} pk={obj.pk}: "
f"created_at={created_at.isoformat()}, "
f"updated_at={updated_at.isoformat()}"
)
if not dry_run:
# Use QuerySet.update() — bypasses auto_now* at the SQL level.
model_class.objects.filter(pk=obj.pk).update(
created_at=created_at,
updated_at=updated_at,
)
count += 1

return count
Loading
Loading