Added pydantic validation to _get_version_number, compare_versions, restore_version in routes, updated tests for new error messages

be-smith · be-smith · commit 77b4f415d7f1 · 2025-11-14T12:52:21.000Z
diff --git a/pydatalab/src/pydatalab/models/versions.py b/pydatalab/src/pydatalab/models/versions.py
@@ -73,7 +73,7 @@ class VersionCounter(BaseModel):
     )
 
     class Config:
-        extra = "forbid"
+        extra = "ignore"  # Allow MongoDB's _id field and other internal fields
 
 
 class RestoreVersionRequest(BaseModel):
diff --git a/pydatalab/src/pydatalab/routes/v0_1/items.py b/pydatalab/src/pydatalab/routes/v0_1/items.py
@@ -22,7 +22,12 @@
 from pydatalab.models.people import Person
 from pydatalab.models.relationships import RelationshipType
 from pydatalab.models.utils import generate_unique_refcode
-from pydatalab.models.versions import VersionAction
+from pydatalab.models.versions import (
+    CompareVersionsQuery,
+    RestoreVersionRequest,
+    VersionAction,
+    VersionCounter,
+)
 from pydatalab.mongo import ITEMS_FTS_FIELDS, flask_mongo
 from pydatalab.permissions import PUBLIC_USER_ID, active_users_or_get_only, get_default_permissions
 
@@ -1060,7 +1065,20 @@ def _get_next_version_number(refcode: str) -> int:
         upsert=True,
         return_document=True,  # Return the document after update
     )
-    return result["counter"]
+
+    # Validate the result with Pydantic
+    try:
+        counter_doc = VersionCounter(**result)
+        return counter_doc.counter
+    except ValidationError as exc:
+        LOGGER.error(
+            "Version counter validation failed for refcode %s: %s",
+            refcode,
+            str(exc),
+        )
+        # Fallback: return raw counter value to prevent blocking saves
+        # This should only happen if the document is corrupted
+        return result["counter"]
 
 
 @ITEMS.route("/items/<refcode>/versions/", methods=["GET"])
@@ -1113,14 +1131,19 @@ def compare_versions(refcode):
     if len(refcode.split(":")) != 2:
         refcode = f"{CONFIG.IDENTIFIER_PREFIX}:{refcode}"
 
-    v1_id = request.args.get("v1")
-    v2_id = request.args.get("v2")
-    if not v1_id or not v2_id:
-        return jsonify({"status": "error", "message": "Both v1 and v2 must be provided"}), 400
+    # Validate query parameters using Pydantic model
+    try:
+        query_params = CompareVersionsQuery(
+            v1=request.args.get("v1", ""), v2=request.args.get("v2", "")
+        )
+    except ValidationError as exc:
+        return jsonify(
+            {"status": "error", "message": "Invalid query parameters", "errors": exc.errors()}
+        ), 400
 
     try:
-        v1_object_id = ObjectId(v1_id)
-        v2_object_id = ObjectId(v2_id)
+        v1_object_id = ObjectId(query_params.v1)
+        v2_object_id = ObjectId(query_params.v2)
     except (InvalidId, TypeError) as e:
         return jsonify({"status": "error", "message": f"Invalid version ID format: {str(e)}"}), 400
 
@@ -1168,15 +1191,20 @@ def restore_version(refcode):
     if len(refcode.split(":")) != 2:
         refcode = f"{CONFIG.IDENTIFIER_PREFIX}:{refcode}"
 
-    req = request.get_json()
-    version_id = req.get("version_id")
-    if not version_id:
-        return jsonify({"status": "error", "message": "version_id must be provided"}), 400
+    # Validate request body using Pydantic model
+    try:
+        restore_request = RestoreVersionRequest(**request.get_json())
+    except ValidationError as exc:
+        return jsonify(
+            {"status": "error", "message": "Invalid request body", "errors": exc.errors()}
+        ), 400
 
     try:
-        version_object_id = ObjectId(version_id)
+        version_object_id = ObjectId(restore_request.version_id)
     except (InvalidId, TypeError):
-        return jsonify({"status": "error", "message": f"Invalid version_id: {version_id}"}), 400
+        return jsonify(
+            {"status": "error", "message": f"Invalid version_id: {restore_request.version_id}"}
+        ), 400
 
     # Check permissions - user must have write access
     current_item = flask_mongo.db.items.find_one(
@@ -1279,7 +1307,7 @@ def restore_version(refcode):
     return jsonify(
         {
             "status": "success",
-            "restored_version": version_id,
+            "restored_version": restore_request.version_id,
             "new_version_number": next_version_number,
         }
     ), 200
diff --git a/pydatalab/tests/server/test_item_versions.py b/pydatalab/tests/server/test_item_versions.py
@@ -234,22 +234,43 @@ def test_compare_versions_missing_parameters(self, client, sample_with_version):
         """Test comparing versions with missing parameters."""
         refcode = sample_with_version.refcode.split(":")[1]
 
-        # Missing v2
+        # Missing v2 - request.args.get() returns "" for missing params, which fails ObjectId validation
         response = client.get(f"/items/{refcode}/compare-versions/?v1=some_id")
         assert response.status_code == 400
-        assert "Both v1 and v2 must be provided" in response.json["message"]
-
-        # Missing v1
+        assert response.json["message"] == "Invalid query parameters"
+        assert "errors" in response.json
+        errors = response.json["errors"]
+        # Should have error for v2 (empty string is invalid ObjectId)
+        v2_errors = [e for e in errors if "v2" in str(e["loc"])]
+        assert len(v2_errors) == 1
+        assert "valid objectid" in v2_errors[0]["msg"].lower()
+
+        # Missing v1 - same behavior
         response = client.get(f"/items/{refcode}/compare-versions/?v2=some_id")
         assert response.status_code == 400
+        assert response.json["message"] == "Invalid query parameters"
+        assert "errors" in response.json
+        errors = response.json["errors"]
+        # Should have error for v1 (empty string is invalid ObjectId)
+        v1_errors = [e for e in errors if "v1" in str(e["loc"])]
+        assert len(v1_errors) == 1
+        assert "valid objectid" in v1_errors[0]["msg"].lower()
 
     def test_compare_versions_invalid_id(self, client, sample_with_version):
         """Test comparing versions with invalid ID format."""
         refcode = sample_with_version.refcode.split(":")[1]
         response = client.get(f"/items/{refcode}/compare-versions/?v1=invalid&v2=invalid")
 
         assert response.status_code == 400
-        assert "Invalid version ID format" in response.json["message"]
+        assert response.json["message"] == "Invalid query parameters"
+        # Check Pydantic's structured error response
+        assert "errors" in response.json
+        errors = response.json["errors"]
+        # Should have errors for both v1 and v2
+        assert len(errors) == 2
+        for error in errors:
+            assert error["loc"][0] in ["v1", "v2"]
+            assert "valid ObjectId" in error["msg"]
 
     def test_compare_versions_detects_changes(self, client, sample_with_version):
         """Test that compare_versions properly detects changes using DeepDiff."""
@@ -417,15 +438,27 @@ def test_restore_version_missing_version_id(self, client, sample_with_version):
         response = client.post(f"/items/{refcode}/restore-version/", json={})
 
         assert response.status_code == 400
-        assert "version_id must be provided" in response.json["message"]
+        assert response.json["message"] == "Invalid request body"
+        # Check Pydantic's structured error response
+        assert "errors" in response.json
+        errors = response.json["errors"]
+        assert len(errors) == 1
+        assert errors[0]["loc"] == ["version_id"]
+        assert "required" in errors[0]["msg"].lower()
 
     def test_restore_version_invalid_id(self, client, sample_with_version):
         """Test restoring with invalid version ID."""
         refcode = sample_with_version.refcode.split(":")[1]
         response = client.post(f"/items/{refcode}/restore-version/", json={"version_id": "invalid"})
 
         assert response.status_code == 400
-        assert "Invalid version_id" in response.json["message"]
+        assert response.json["message"] == "Invalid request body"
+        # Check Pydantic's structured error response
+        assert "errors" in response.json
+        errors = response.json["errors"]
+        assert len(errors) == 1
+        assert errors[0]["loc"] == ["version_id"]
+        assert "valid ObjectId" in errors[0]["msg"]
 
     def test_restore_version_nonexistent(self, client, sample_with_version):
         """Test restoring non-existent version."""

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ class VersionCounter(BaseModel):`
`73`	`73`	`)`
`74`	`74`
`75`	`75`	`class Config:`
`76`		`- extra = "forbid"`
	`76`	`+ extra = "ignore" # Allow MongoDB's _id field and other internal fields`
`77`	`77`
`78`	`78`
`79`	`79`	`class RestoreVersionRequest(BaseModel):`