From 9652830402a775e430d120abfe0668d24c8799d1 Mon Sep 17 00:00:00 2001 From: sinnec Date: Thu, 30 Mar 2023 16:05:02 +0300 Subject: [PATCH] Database Timezone aware --- whitebox/cron_tasks/monitoring_metrics.py | 4 ++-- whitebox/cron_tasks/shared.py | 15 ++++++++++++--- whitebox/entities/Alert.py | 6 +++--- whitebox/entities/DatasetRow.py | 4 ++-- whitebox/entities/DriftingMetric.py | 6 +++--- whitebox/entities/Inference.py | 6 +++--- whitebox/entities/Model.py | 4 ++-- whitebox/entities/ModelIntegrityMetric.py | 6 +++--- whitebox/entities/ModelMonitor.py | 4 ++-- whitebox/entities/PerformanceMetric.py | 18 +++++++++--------- whitebox/entities/User.py | 4 ++-- whitebox/schemas/user.py | 1 - whitebox/tests/unit_tests/test_unit.py | 18 +++++++++++------- whitebox/tests/v1/test_drifting_metrics.py | 6 +++--- 14 files changed, 57 insertions(+), 45 deletions(-) diff --git a/whitebox/cron_tasks/monitoring_metrics.py b/whitebox/cron_tasks/monitoring_metrics.py index cbc112b..1dfae43 100644 --- a/whitebox/cron_tasks/monitoring_metrics.py +++ b/whitebox/cron_tasks/monitoring_metrics.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone import pandas as pd import time from sqlalchemy import create_engine @@ -260,7 +260,7 @@ async def run_calculate_metrics_pipeline(): last_report_time = ( last_report.timestamp if last_report - else round_timestamp(datetime.utcnow(), "1D") + else round_timestamp(datetime.now(timezone.utc), "1D") ) unused_inference_rows_in_db = await get_unused_model_inference_rows( diff --git a/whitebox/cron_tasks/shared.py b/whitebox/cron_tasks/shared.py index 45e3f13..858c425 100644 --- a/whitebox/cron_tasks/shared.py +++ b/whitebox/cron_tasks/shared.py @@ -2,6 +2,7 @@ import itertools import pandas as pd import datetime +import pytz from sqlalchemy.orm import Session from whitebox import crud from whitebox.schemas.inferenceRow import InferenceRow @@ -43,7 +44,10 @@ async def group_inference_rows_by_timestamp( for x in dict_inference_rows: new_obj = {**x} new_obj["timestamp"] = change_timestamp( - x["timestamp"], last_time, granularity_amount, granularity_type + x["timestamp"], + last_time, + granularity_amount, + granularity_type, ) updated_inferences_dict.append(new_obj) @@ -159,7 +163,10 @@ def change_timestamp( (E.g. a timestamp 2023-03-03 12:33:25.34432 when granularity is set to 2D and the previous group's timestamp is \ 2023-03-03 00:00:00 will be converted into 2023-03-05 00:00:00)""" - timestamp_in_seconds = round_timestamp(timestamp, granularity_type).timestamp() + timestamp_utc_timezone = timestamp.replace(tzinfo=pytz.UTC) + timestamp_in_seconds = round_timestamp( + timestamp_utc_timezone, granularity_type + ).timestamp() granularity_in_seconds = convert_granularity_to_secs( granularity_amount, granularity_type @@ -175,7 +182,9 @@ def change_timestamp( time_difference + 1 ) * granularity_in_seconds + start_time_in_seconds - new_timestamp = datetime.datetime.fromtimestamp(new_timestamp_in_seconds) + new_timestamp = datetime.datetime.fromtimestamp( + new_timestamp_in_seconds + ).astimezone(datetime.timezone.utc) return new_timestamp diff --git a/whitebox/entities/Alert.py b/whitebox/entities/Alert.py index a43a379..2564d73 100644 --- a/whitebox/entities/Alert.py +++ b/whitebox/entities/Alert.py @@ -11,7 +11,7 @@ class Alert(Base): model_monitor_id = Column( String, ForeignKey("model_monitors.id", ondelete="CASCADE") ) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) description = Column(String) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/DatasetRow.py b/whitebox/entities/DatasetRow.py index c7f9fbd..df3d62f 100644 --- a/whitebox/entities/DatasetRow.py +++ b/whitebox/entities/DatasetRow.py @@ -10,5 +10,5 @@ class DatasetRow(Base): model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) nonprocessed = Column(JSON) processed = Column(JSON) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/DriftingMetric.py b/whitebox/entities/DriftingMetric.py index 5496904..f5e097c 100644 --- a/whitebox/entities/DriftingMetric.py +++ b/whitebox/entities/DriftingMetric.py @@ -8,8 +8,8 @@ class DriftingMetric(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) concept_drift_summary = Column(JSON) data_drift_summary = Column(JSON) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/Inference.py b/whitebox/entities/Inference.py index 2d0d6d4..aed2e53 100644 --- a/whitebox/entities/Inference.py +++ b/whitebox/entities/Inference.py @@ -8,11 +8,11 @@ class InferenceRow(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) nonprocessed = Column(JSON) processed = Column(JSON) is_used = Column(Boolean) actual = Column(Float, nullable=True) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/Model.py b/whitebox/entities/Model.py index 2cae97d..d7556d7 100644 --- a/whitebox/entities/Model.py +++ b/whitebox/entities/Model.py @@ -15,8 +15,8 @@ class Model(Base): target_column = Column(String) granularity = Column(String) labels = Column(JSON, nullable=True) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) dataset_rows = relationship("DatasetRow") inference_rows = relationship("InferenceRow") diff --git a/whitebox/entities/ModelIntegrityMetric.py b/whitebox/entities/ModelIntegrityMetric.py index 277794a..5eac5fa 100644 --- a/whitebox/entities/ModelIntegrityMetric.py +++ b/whitebox/entities/ModelIntegrityMetric.py @@ -8,7 +8,7 @@ class ModelIntegrityMetric(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) feature_metrics = Column(JSON) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/ModelMonitor.py b/whitebox/entities/ModelMonitor.py index a0b406d..a774def 100644 --- a/whitebox/entities/ModelMonitor.py +++ b/whitebox/entities/ModelMonitor.py @@ -17,7 +17,7 @@ class ModelMonitor(Base): lower_threshold = Column(Numeric, nullable=True) severity = Column("severity", Enum(AlertSeverity)) email = Column(String) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) alerts = relationship("Alert") diff --git a/whitebox/entities/PerformanceMetric.py b/whitebox/entities/PerformanceMetric.py index c2969d4..11ba522 100644 --- a/whitebox/entities/PerformanceMetric.py +++ b/whitebox/entities/PerformanceMetric.py @@ -8,7 +8,7 @@ class BinaryClassificationMetrics(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) accuracy = Column(Float) precision = Column(Float) recall = Column(Float) @@ -17,8 +17,8 @@ class BinaryClassificationMetrics(Base): false_positive = Column(Integer) false_negative = Column(Integer) true_positive = Column(Integer) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) class MultiClassificationMetrics(Base): @@ -26,14 +26,14 @@ class MultiClassificationMetrics(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) accuracy = Column(Float) precision = Column(JSON) recall = Column(JSON) f1 = Column(JSON) confusion_matrix = Column(JSON) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) class RegressionMetrics(Base): @@ -41,9 +41,9 @@ class RegressionMetrics(Base): id = Column(String, primary_key=True, unique=True, default=generate_uuid) model_id = Column(String, ForeignKey("models.id", ondelete="CASCADE")) - timestamp = Column(DateTime) + timestamp = Column(DateTime(timezone=True)) r_square = Column(Float) mean_squared_error = Column(Float) mean_absolute_error = Column(Float) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/entities/User.py b/whitebox/entities/User.py index b0838a6..4b857e2 100644 --- a/whitebox/entities/User.py +++ b/whitebox/entities/User.py @@ -10,5 +10,5 @@ class User(Base): id = Column(String, unique=True, primary_key=True, default=generate_uuid) username = Column(String) api_key = Column(String) - created_at = Column(DateTime) - updated_at = Column(DateTime) + created_at = Column(DateTime(timezone=True)) + updated_at = Column(DateTime(timezone=True)) diff --git a/whitebox/schemas/user.py b/whitebox/schemas/user.py index 6ed25f2..19c3d28 100644 --- a/whitebox/schemas/user.py +++ b/whitebox/schemas/user.py @@ -1,4 +1,3 @@ -from typing import Optional, Union from pydantic import BaseModel from whitebox.schemas.base import ItemBase diff --git a/whitebox/tests/unit_tests/test_unit.py b/whitebox/tests/unit_tests/test_unit.py index 63ba3e7..663906c 100644 --- a/whitebox/tests/unit_tests/test_unit.py +++ b/whitebox/tests/unit_tests/test_unit.py @@ -1,17 +1,21 @@ from whitebox.cron_tasks.shared import change_timestamp -from datetime import datetime +from datetime import datetime, timezone class TestNodes: def test_round_timestamp(self): - timestamp = datetime(2023, 3, 7, 15, 34, 23) - start_time = datetime(2023, 3, 6) + timestamp = datetime(2023, 3, 7, 15, 34, 23, tzinfo=timezone.utc) + start_time = datetime(2023, 3, 6, tzinfo=timezone.utc) assert change_timestamp(timestamp, start_time, 15, "T") == datetime( - 2023, 3, 7, 15, 45 + 2023, 3, 7, 15, 45, tzinfo=timezone.utc ) assert change_timestamp(timestamp, start_time, 5, "H") == datetime( - 2023, 3, 7, 16, 0 + 2023, 3, 7, 16, 0, tzinfo=timezone.utc + ) + assert change_timestamp(timestamp, start_time, 2, "D") == datetime( + 2023, 3, 8, tzinfo=timezone.utc + ) + assert change_timestamp(timestamp, start_time, 1, "W") == datetime( + 2023, 3, 13, tzinfo=timezone.utc ) - assert change_timestamp(timestamp, start_time, 2, "D") == datetime(2023, 3, 8) - assert change_timestamp(timestamp, start_time, 1, "W") == datetime(2023, 3, 13) diff --git a/whitebox/tests/v1/test_drifting_metrics.py b/whitebox/tests/v1/test_drifting_metrics.py index 3939a94..8e16786 100644 --- a/whitebox/tests/v1/test_drifting_metrics.py +++ b/whitebox/tests/v1/test_drifting_metrics.py @@ -26,8 +26,8 @@ def test_drifting_metric_get_model_all(client, api_key): assert len(response_multi_json) == 1 assert len(response_binary_json) == 1 - assert response_multi_json[0]["timestamp"] == "2023-03-06T12:15:00" - assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00" + assert response_multi_json[0]["timestamp"] == "2023-03-06T12:15:00+00:00" + assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00+00:00" assert response_multi.status_code == status.HTTP_200_OK assert response_binary.status_code == status.HTTP_200_OK @@ -48,7 +48,7 @@ def test_drifting_metrics_get_binary_model_after_x_time(client, api_key): assert len(response_binary_json) == 1 - assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00" + assert response_binary_json[0]["timestamp"] == "2023-03-07T00:00:00+00:00" assert response_binary.status_code == status.HTTP_200_OK