-
Notifications
You must be signed in to change notification settings - Fork 26
Adding version control to samples, starting materials and cells #1373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3bc3283
28ad426
978ed05
ccf208e
c24b5e6
72e000a
e8d1f67
e3adc63
d7e66bc
c6229c9
f2d965f
5f294c7
1c8c79d
a767f64
86e1ca1
01a7d9d
8c1b43c
78934e6
144cba2
f5e8d21
41db7e9
1197376
0be43e6
6dd63c4
65a7476
3fc6ff4
3085336
9ca38db
d498f85
9f1de5a
7acf79b
51aa0d4
2dec696
cbbc9af
8a0fef3
c7bd6ba
ecfce07
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,9 @@ class HasRevisionControl(BaseModel): | |
| revisions: dict[int, Any] | None = None | ||
| """An optional mapping from old revision numbers to the model state at that revision.""" | ||
|
|
||
| version: int = 1 | ||
| """The version number used by the version control system for tracking snapshots.""" | ||
|
Comment on lines
+25
to
+26
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the same as |
||
|
|
||
|
|
||
| class HasBlocks(BaseModel): | ||
| blocks_obj: dict[str, DataBlockResponse] = Field({}) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| """Pydantic models for version control system.""" | ||
|
|
||
| from datetime import datetime | ||
| from enum import Enum | ||
|
|
||
| from pydantic import BaseModel, Field, validator | ||
|
|
||
| from pydatalab.models.utils import PyObjectId, Refcode | ||
|
|
||
|
|
||
| class VersionAction(str, Enum): | ||
| """Valid actions that can create a version snapshot.""" | ||
|
|
||
| CREATED = "created" | ||
| MANUAL_SAVE = "manual_save" | ||
| AUTO_SAVE = "auto_save" | ||
| RESTORED = "restored" | ||
|
|
||
|
|
||
| class ItemVersion(BaseModel): | ||
| """A complete snapshot of an item at a specific point in time. | ||
| This model represents a version entry in the `item_versions` collection. | ||
| Each version captures the complete state of an item, allowing users to | ||
| view history and restore previous states. | ||
| """ | ||
|
|
||
| refcode: Refcode = Field(..., description="The refcode of the item this version belongs to") | ||
| version: int = Field(..., ge=1, description="Sequential version number (1-indexed)") | ||
| timestamp: datetime = Field( | ||
| ..., description="When this version was created (ISO format with timezone)" | ||
| ) | ||
| action: VersionAction = Field( | ||
| ..., | ||
| description="The action that triggered this version: 'created' (item creation), " | ||
| "'manual_save' (user save), 'auto_save' (system save), or 'restored' (version restore)", | ||
| ) | ||
| user_id: PyObjectId | None = Field( | ||
| None, description="User's ObjectId for efficient querying and indexing" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which user? This can be multivalued right? I don't think its any less efficient to query on nested fields like |
||
| ) | ||
| datalab_version: str = Field( | ||
| ..., description="Version of datalab-server that created this snapshot" | ||
| ) | ||
| data: dict = Field(..., description="Complete snapshot of the item data at this version") | ||
| restored_from_version: PyObjectId | None = Field( | ||
| None, | ||
| description="ObjectId of the version that was restored from (only present if action='restored')", | ||
| ) | ||
|
|
||
| @validator("restored_from_version") | ||
| def validate_restored_from_version(cls, v, values): | ||
| """Ensure restored_from_version is only present when action='restored'.""" | ||
| action = values.get("action") | ||
| if action == VersionAction.RESTORED and v is None: | ||
| raise ValueError("restored_from_version must be provided when action='restored'") | ||
| if action != VersionAction.RESTORED and v is not None: | ||
| raise ValueError( | ||
| f"restored_from_version should only be present when action='restored', got action='{action}'" | ||
| ) | ||
| return v | ||
|
|
||
|
|
||
| class VersionCounter(BaseModel): | ||
| """Atomic counter for tracking version numbers per item. | ||
| This model represents a document in the `version_counters` collection. | ||
| It ensures atomic increment of version numbers to prevent race conditions. | ||
| """ | ||
|
|
||
| refcode: Refcode = Field(..., description="The refcode this counter belongs to") | ||
| counter: int = Field( | ||
| 1, ge=1, description="Current version counter value (1-indexed, matches version numbers)" | ||
| ) | ||
|
|
||
| class Config: | ||
| extra = "ignore" # Allow MongoDB's _id field and other internal fields | ||
|
|
||
|
|
||
| class RestoreVersionRequest(BaseModel): | ||
| """Request body for restoring a version.""" | ||
|
|
||
| version_id: str = Field(..., description="ObjectId string of the version to restore to") | ||
|
|
||
| @validator("version_id") | ||
| def validate_version_id_format(cls, v): | ||
| """Validate that version_id is a valid ObjectId string.""" | ||
| try: | ||
| from bson import ObjectId | ||
|
|
||
| ObjectId(v) | ||
| except Exception as e: | ||
| raise ValueError(f"version_id must be a valid ObjectId string: {e}") | ||
| return v | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
|
||
|
|
||
| class CompareVersionsQuery(BaseModel): | ||
| """Query parameters for comparing two versions.""" | ||
|
|
||
| v1: str = Field(..., description="ObjectId string of the first version") | ||
| v2: str = Field(..., description="ObjectId string of the second version") | ||
|
|
||
| @validator("v1", "v2") | ||
| def validate_version_ids(cls, v): | ||
| """Validate that version IDs are valid ObjectId strings.""" | ||
| try: | ||
| from bson import ObjectId | ||
|
|
||
| ObjectId(v) | ||
| except Exception as e: | ||
| raise ValueError(f"Version ID must be a valid ObjectId string: {e}") | ||
| return v | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
Comment on lines
+63
to
+117
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine in principle -- we should have pydantic models for requests, but we don't yet -- let's remember to move this somewhere better later on |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -116,6 +116,11 @@ def create_default_indices( | |
| - An index over item type, | ||
| - A unique index over `item_id` and `refcode`. | ||
| - A text index over user names and identities. | ||
| - Version control indexes: | ||
| - Index on item_versions.refcode for fast version history lookup | ||
| - Index on item_versions.user_id for fast user contribution queries | ||
| - Compound index on (refcode, version) for sorted version history | ||
| - Unique index on version_counters.refcode for atomic version numbering | ||
|
|
||
| Parameters: | ||
| background: If true, indexes will be created as background jobs. | ||
|
|
@@ -238,4 +243,16 @@ def create_group_fts(): | |
| db.users.drop_index(group_fts_name) | ||
| ret += create_group_fts() | ||
|
|
||
| # Version control indexes | ||
| ret += db.item_versions.create_index("refcode", name="version refcode", background=background) | ||
| ret += db.item_versions.create_index("user_id", name="version user_id", background=background) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above about user IDs -- need to make sure we can handle multiple creators here, but I'm also not sure why we need fast querying by user (surely we always know the item ID when doing this) |
||
| ret += db.item_versions.create_index( | ||
| [("refcode", pymongo.ASCENDING), ("version", pymongo.DESCENDING)], | ||
| name="refcode and version", | ||
| background=background, | ||
| ) | ||
| ret += db.version_counters.create_index( | ||
| "refcode", unique=True, name="unique refcode counter", background=background | ||
| ) | ||
|
|
||
| return ret | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to export any of these at the top level