UNSW-CEEM · mdavis-xyz · Jan 6, 2025 · Jan 25, 2025 · Dec 6, 2025 · Dec 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 .idea/*
 src/nemosis/__pycache__/*
 venv/*
+venv*/*
 build/*
 dist/*
 src/nemosis/build/*

diff --git a/README.md b/README.md
@@ -50,7 +50,7 @@ Choose the exe from the latest [release](https://github.com/UNSW-CEEM/NEMOSIS/re
 
 ## Contributing
 
-Interested in contributing? Check out the [contributing instructions](./CONTRIBUTING. md), which also includes steps to install `nemosis` for development.
+Interested in contributing? Check out the [contributing instructions](./CONTRIBUTING.md), which also includes steps to install `nemosis` for development.
 
 Please note that this project is released with a [Code of Conduct](./CONDUCT.md). By contributing to this project, you agree to abide by its terms.
 
@@ -100,9 +100,10 @@ Your workflow may determine how you use NEMOSIS. Because the GUI relies on data
 
 ```python
 from nemosis import dynamic_data_compiler
+from datetime import datetime
 
-start_time = '2017/01/01 00:00:00'
-end_time = '2017/01/01 00:05:00'
+start_time = datetime(2017, 1, 1, 0, 0)
+end_time = datetime(2017, 1, 1, 0, 5)
 table = 'DISPATCHPRICE'
 raw_data_cache = 'C:/Users/your_data_storage'
 
@@ -113,6 +114,8 @@ Using the default settings of `dynamic_data_compiler` will download CSV data fro
 
 A number of options are available to configure filtering (i.e. what data NEMOSIS returns as a pandas DataFrame) and caching.
 
+For `start_time` and `end_time` you can pass a datetime (timezone unaware), a `date`, or a string of the form "YYYY/MM/DD HH:MM:SS", e.g. `2017/01/01 00:00:00`.
+
 ###### Filter options
 
 `dynamic_data_compiler` can be used to filter data before returning results.
@@ -206,8 +209,8 @@ from nemosis import defaults
 
 defaults.table_columns['BIDPEROFFER_D'] += ['PASAAVAILABILITY']
 
-start_time = '2017/01/01 00:00:00'
-end_time = '2017/01/01 00:05:00'
+start_time = datetime(2017, 1, 1, 0, 0)
+end_time = datetime(2017, 1, 1, 0, 5)
 table = 'BIDPEROFFER_D'
 raw_data_cache = 'C:/Users/your_data_storage'
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,17 +14,20 @@ dependencies = [
     "xlrd>=2.0.1",
     "beautifulsoup4>=4.12.3",
     "openpyxl>=3.1.5",
+    "cachetools>=7",
 ]
 readme = "README.md"
-requires-python = ">= 3.9"
+requires-python = ">= 3.10"
 
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [tool.uv]
 managed = true
-dev-dependencies = [
+
+[dependency-groups]
+dev = [
     "parameterized>=0.9.0",
     "pyinstaller>=6.11.1",
     "pytest>=8.3.4",

diff --git a/src/nemosis/__init__.py b/src/nemosis/__init__.py
@@ -1,6 +1,6 @@
 import logging
 import sys
-from .value_parser import _parse_datetime, _parse_column, _infer_column_data_types
+from .value_parser import _parse_column, _infer_column_data_types
 from .data_fetch_methods import *
 
 name = "osdan"

diff --git a/src/nemosis/custom_tables.py b/src/nemosis/custom_tables.py
@@ -2,8 +2,10 @@
 from datetime import timedelta, datetime
 import math
 import numpy as np
-from nemosis import defaults, data_fetch_methods, filters
-
+from nemosis import defaults
+from nemosis.date_generators import parse_datetime_py
+from nemosis.filters import filter_on_column_value
+from nemosis.data_fetch_methods import dynamic_data_compiler, static_table
 
 def fcas4s_scada_match(
     start_time,
@@ -17,12 +19,12 @@ def fcas4s_scada_match(
 
     # Pull in the 4 second fcas data.
     table_name_fcas4s = "FCAS_4_SECOND"
-    fcas4s = data_fetch_methods.dynamic_data_compiler(
+    fcas4s = dynamic_data_compiler(
         start_time, end_time, table_name_fcas4s, raw_data_location
     )
     # Pull in the 4 second fcas variable types.
     table_name_variable_types = "VARIABLES_FCAS_4_SECOND"
-    fcas4s_variable_types = data_fetch_methods.static_table(
+    fcas4s_variable_types = static_table(
         table_name_variable_types, raw_data_location
     )
 
@@ -52,7 +54,7 @@ def fcas4s_scada_match(
 
     # Pull in the dispatch unit scada data.
     table_name_scada = "DISPATCH_UNIT_SCADA"
-    scada = data_fetch_methods.dynamic_data_compiler(
+    scada = dynamic_data_compiler(
         start_time, end_time, table_name_scada, raw_data_location
     )
     scada["SETTLEMENTDATE"] = scada["SETTLEMENTDATE"] - timedelta(minutes=5)
@@ -62,7 +64,7 @@ def fcas4s_scada_match(
 
     # Pull in the interconnector scada data and use the intervention records where the exist.
     table_name_inter_flow = "DISPATCHINTERCONNECTORRES"
-    inter_flows = data_fetch_methods.dynamic_data_compiler(
+    inter_flows = dynamic_data_compiler(
         start_time, end_time, table_name_inter_flow, raw_data_location
     )
     inter_flows["METEREDMWFLOW"] = pd.to_numeric(inter_flows["METEREDMWFLOW"])
@@ -144,7 +146,7 @@ def fcas4s_scada_match(
         best_matches_scada = best_matches_scada.loc[:, select_columns]
 
     if filter_cols is not None:
-        best_matches_scada = filters.filter_on_column_value(
+        best_matches_scada = filter_on_column_value(
             best_matches_scada, filter_cols, filter_values
         )
 
@@ -232,8 +234,7 @@ def stats_for_group(capacity_and_scada_grouped):
     peak_percentile = capacity_factor_over_90th_percentile_of_nodal_demand(
         capacity_and_scada_grouped
     )
-    month = list(capacity_and_scada_grouped["MONTH"])[0]
-    duid = list(capacity_and_scada_grouped["DUID"])[0]
+    month, duid = capacity_and_scada_grouped.name # the keys used for the grouping
     cf_df = pd.DataFrame(
         {
             "Month": [month],
@@ -260,7 +261,7 @@ def stats_by_month_and_plant(capacity_and_scada):
     )
     capacity_factors = capacity_and_scada.groupby(
         ["MONTH", "DUID"], as_index=False
-    ).apply(stats_for_group)
+    ).apply(stats_for_group, include_groups=False)
     return capacity_factors
 
 
@@ -409,15 +410,15 @@ def plant_stats(
 ):
 
     ix = pd.date_range(
-        start=datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S"),
-        end=datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=5),
+        start=parse_datetime_py(start_time, midnight='start'),
+        end=parse_datetime_py(end_time, midnight='end') - timedelta(minutes=5),
         freq="5T",
     )
     timeseries_df = pd.DataFrame(index=ix)
     timeseries_df.reset_index(inplace=True)
     timeseries_df.columns = ["SETTLEMENTDATE"]
 
-    gen_max_cap = data_fetch_methods.dynamic_data_compiler(
+    gen_max_cap = dynamic_data_compiler(
         start_time,
         end_time,
         "DUDETAIL",
@@ -429,7 +430,7 @@ def plant_stats(
     gen_max_cap = select_highest_version_number(
         gen_max_cap, defaults.table_primary_keys["DUDETAIL"]
     )
-    gen_region = data_fetch_methods.dynamic_data_compiler(
+    gen_region = dynamic_data_compiler(
         start_time,
         end_time,
         "DUDETAILSUMMARY",
@@ -438,14 +439,14 @@ def plant_stats(
         filter_cols=filter_cols,
         filter_values=filter_values,
     )
-    scada = data_fetch_methods.dynamic_data_compiler(
+    scada = dynamic_data_compiler(
         start_time,
         end_time,
         "DISPATCH_UNIT_SCADA",
         raw_data_location,
         select_columns=["SETTLEMENTDATE", "DUID", "SCADAVALUE"],
     )
-    dispatch_price = data_fetch_methods.dynamic_data_compiler(
+    dispatch_price = dynamic_data_compiler(
         start_time,
         end_time,
         "DISPATCHPRICE",
@@ -455,7 +456,7 @@ def plant_stats(
     dispatch_price = select_intervention_if_present(
         dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"]
     )
-    trading_price = data_fetch_methods.dynamic_data_compiler(
+    trading_price = dynamic_data_compiler(
         start_time,
         end_time,
         "TRADINGPRICE",
@@ -466,7 +467,7 @@ def plant_stats(
     trading_price["RRP"] = pd.to_numeric(trading_price["RRP"])
     # trading_price = calc_trading_price(dispatch_price)
 
-    region_summary = data_fetch_methods.dynamic_data_compiler(
+    region_summary = dynamic_data_compiler(
         start_time,
         end_time,
         "DISPATCHREGIONSUM",
@@ -518,23 +519,23 @@ def plant_stats(
 
 
 def trading_and_dispatch_cost():
-    gen_region = data_fetch_methods.dynamic_data_compiler(
+    gen_region = dynamic_data_compiler(
         "2017/01/01 00:05:00",
         "2018/01/01 00:05:00",
         "DUDETAILSUMMARY",
         defaults.raw_data_cache,
         select_columns=["START_DATE", "END_DATE", "DUID", "REGIONID"],
     )
-    scada = data_fetch_methods.dynamic_data_compiler(
+    scada = dynamic_data_compiler(
         "2017/01/01 00:05:00",
         "2018/01/01 00:05:00",
         "DISPATCH_UNIT_SCADA",
         defaults.raw_data_cache,
     )
 
     ix = pd.date_range(
-        start=datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"),
-        end=datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"),
+        start=datetime(2017, 1, 1),
+        end=datetime(2018, 1, 1),
         freq="5T",
     )
     timeseries_df = pd.DataFrame(index=ix)
@@ -552,7 +553,7 @@ def trading_and_dispatch_cost():
 
     scada = pd.concat(scada_list)
 
-    dispatch_price = data_fetch_methods.dynamic_data_compiler(
+    dispatch_price = dynamic_data_compiler(
         "2017/01/01 00:00:00",
         "2018/01/01 00:05:00",
         "DISPATCHPRICE",