From 53c75ebe6c2b20e2de766ddb338166dab9fe3486 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 24 May 2026 00:22:33 -0400
Subject: [PATCH 1/4] Update private UK survey prerequisites

---
 .../datasets/imputations/consumption.py       | 151 +++++++++++++-----
 .../datasets/imputations/services/etb.py      |  42 +++--
 .../datasets/imputations/vat.py               |  55 +++++--
 .../datasets/imputations/wealth.py            | 105 +++++++-----
 .../datasets/private_releases.py              |  79 +++++++++
 .../storage/download_private_prerequisites.py |  11 +-
 .../tests/test_frs_prerequisites.py           |  26 +++
 .../tests/test_private_releases.py            |  72 +++++++++
 .../tests/test_road_fuel_volume_uprating.py   |  17 +-
 .../tests/test_student_loan_balance.py        |  47 ++++--
 .../tests/test_vat_parameters.py              |   7 +
 11 files changed, 490 insertions(+), 122 deletions(-)
 create mode 100644 policyengine_uk_data/datasets/private_releases.py
 create mode 100644 policyengine_uk_data/tests/test_private_releases.py

diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py
index 9723631c2..cb04ffb0c 100644
--- a/policyengine_uk_data/datasets/imputations/consumption.py
+++ b/policyengine_uk_data/datasets/imputations/consumption.py
@@ -23,12 +23,16 @@
 import pandas as pd
 import numpy as np
 from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE
+from policyengine_uk_data.datasets.private_releases import (
+    CURRENT_LCFS_RELEASE,
+    CURRENT_WAS_RELEASE,
+)
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk import Microsimulation
 from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR
 
-LCFS_TAB_FOLDER = STORAGE_FOLDER / "lcfs_2021_22"
+LCFS_TAB_FOLDER = STORAGE_FOLDER / CURRENT_LCFS_RELEASE.name
 
 # Default seed for the stochastic ICE-vehicle flag drawn from
 # `NTS_2024_ICE_VEHICLE_SHARE`. Kept at 42 for backward compatibility with
@@ -39,20 +43,31 @@
 # EV/ICE vehicle mix from NTS 2024
 NTS_2024_ICE_VEHICLE_SHARE = 0.90
 
-# DESNZ weekly road-fuel price statistics, "Data" sheet, fiscal-year average
-# UK pump prices over 2021-04-01 to 2022-03-31. Data source:
+# DESNZ weekly road-fuel price statistics, fiscal-year average UK pump prices.
+# 2023 prices cover 2023-04-01 to 2024-03-31 for the current LCFS release.
+# Data source:
 # https://www.data.gov.uk/dataset/21db6396-3daf-4d90-8b3f-054995256018/petrol-and-diesel-prices
 # LCFS records nominal fuel spending, while PolicyEngine derives litres via
 # ``spending / model pump price``.
 LCFS_FUEL_PRICE_GBP_PER_LITRE = {
-    "petrol_spending": {2021: 1.3890790089424998},
-    "diesel_spending": {2021: 1.4291180616502566},
+    "petrol_spending": {
+        2021: 1.3890790089424998,
+        2023: 1.4615903846153844,
+    },
+    "diesel_spending": {
+        2021: 1.4291180616502566,
+        2023: 1.5348538461538461,
+    },
 }
 FUEL_PRICE_PARAMETER_NAME = {
     "petrol_spending": "petrol",
     "diesel_spending": "diesel",
 }
-CONSUMPTION_MODEL_FILENAME = "consumption_fuel_litre_proxy_2026_05.pkl"
+CONSUMPTION_MODEL_FILENAME = (
+    f"consumption_{CURRENT_LCFS_RELEASE.name}_{CURRENT_WAS_RELEASE.name}"
+    "_fuel_litre_proxy_2026_05.pkl"
+)
+HAS_FUEL_MODEL_FILENAME = f"has_fuel_{CURRENT_WAS_RELEASE.name}.pkl"
 
 REGIONS = {
     1: "NORTH_EAST",
@@ -84,7 +99,7 @@
 }
 
 # LCFS A121 → FRS accommodation_type mapping
-# LCFS coding inferred from LCFS 2021/22 user guide:
+# LCFS coding inferred from the LCFS user guide:
 # 1=detached house, 2=semi-detached, 3=terraced, 4=flat (purpose-built),
 # 5=flat/other (converted), 6=caravan/mobile, 7=bungalow/other house, 8=other
 LCFS_ACCOMM_MAP = {
@@ -164,6 +179,60 @@
     "gas_consumption",
 ]
 
+HAS_FUEL_PREDICTOR_VARIABLES = [
+    "household_net_income",
+    "num_adults",
+    "num_children",
+    "private_pension_income",
+    "employment_income",
+    "self_employment_income",
+    "region",
+]
+
+
+def get_has_fuel_model_path():
+    return STORAGE_FOLDER / HAS_FUEL_MODEL_FILENAME
+
+
+def get_has_fuel_model_metadata() -> dict:
+    return {
+        "was_release_name": CURRENT_WAS_RELEASE.name,
+        "was_household_tab_filename": CURRENT_WAS_RELEASE.household_tab_filename,
+        "predictor_variables": tuple(HAS_FUEL_PREDICTOR_VARIABLES),
+        "impute_variables": ("has_fuel_consumption",),
+        "ice_vehicle_share": NTS_2024_ICE_VEHICLE_SHARE,
+        "seed": _HAS_FUEL_SEED,
+    }
+
+
+def get_consumption_model_path():
+    return STORAGE_FOLDER / CONSUMPTION_MODEL_FILENAME
+
+
+def get_consumption_model_metadata() -> dict:
+    return {
+        "lcfs_release_name": CURRENT_LCFS_RELEASE.name,
+        "lcfs_household_tab_filename": CURRENT_LCFS_RELEASE.household_tab_filename,
+        "lcfs_person_tab_filename": CURRENT_LCFS_RELEASE.person_tab_filename,
+        "lcfs_fuel_price_year": CURRENT_LCFS_RELEASE.fuel_price_year,
+        "was_release_name": CURRENT_WAS_RELEASE.name,
+        "was_household_tab_filename": CURRENT_WAS_RELEASE.household_tab_filename,
+        "frs_base_year": CURRENT_FRS_RELEASE.base_year,
+        "predictor_variables": tuple(PREDICTOR_VARIABLES),
+        "impute_variables": tuple(IMPUTATIONS),
+    }
+
+
+def _qrf_model_matches_current_metadata(
+    model, metadata: dict, outputs: list[str]
+) -> bool:
+    if getattr(model, "metadata", {}) != metadata:
+        return False
+
+    trained_outputs = getattr(model.model, "imputed_variables", None)
+    return list(trained_outputs) == outputs
+
+
 # ── NEED 2023 calibration targets ─────────────────────────────────────────────
 # Source: NEED 2023 headline tables (published 2025), England & Wales, ~18M dwellings.
 # Tables 11b/12b: mean gas/electricity kWh by income; 9b/10b by tenure;
@@ -420,21 +489,27 @@ def create_has_fuel_model():
     from policyengine_uk_data.utils.qrf import QRF
     from policyengine_uk_data.datasets.imputations.wealth import (
         WAS_TAB_FOLDER,
-        REGIONS,
+        generate_was_table,
     )
 
-    model_path = STORAGE_FOLDER / "has_fuel_model.pkl"
+    model_path = get_has_fuel_model_path()
     if model_path.exists():
-        return QRF(file_path=model_path)
+        cached = QRF(file_path=model_path)
+        if _qrf_model_matches_current_metadata(
+            cached,
+            get_has_fuel_model_metadata(),
+            ["has_fuel_consumption"],
+        ):
+            return cached
 
     was = pd.read_csv(
-        WAS_TAB_FOLDER / "was_round_7_hhold_eul_march_2022.tab",
+        WAS_TAB_FOLDER / CURRENT_WAS_RELEASE.household_tab_filename,
         sep="\t",
         low_memory=False,
     )
-    was.columns = [c.lower() for c in was.columns]
+    was = generate_was_table(was)
 
-    num_vehicles = was["vcarnr7"].fillna(0).clip(lower=0)
+    num_vehicles = was["num_vehicles"].fillna(0).clip(lower=0)
     has_vehicle = num_vehicles > 0
     # Use a local RNG so we don't mutate the global np.random state (which
     # would silently change any unrelated consumer of np.random that runs
@@ -444,30 +519,16 @@ def create_has_fuel_model():
         has_vehicle & (rng.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE)
     ).astype(float)
 
-    was_df = pd.DataFrame(
-        {
-            "household_net_income": was["dvtotinc_bhcr7"],
-            "num_adults": was["numadultr7"],
-            "num_children": was["numch18r7"],
-            "private_pension_income": was["dvgippenr7_aggr"],
-            "employment_income": was["dvgiempr7_aggr"],
-            "self_employment_income": was["dvgiser7_aggr"],
-            "region": was["gorr7"].map(REGIONS),
-            "has_fuel_consumption": has_fuel,
-        }
-    ).dropna()
+    was_df = was[HAS_FUEL_PREDICTOR_VARIABLES].copy()
+    was_df["has_fuel_consumption"] = has_fuel
+    was_df = was_df.dropna()
 
-    predictors = [
-        "household_net_income",
-        "num_adults",
-        "num_children",
-        "private_pension_income",
-        "employment_income",
-        "self_employment_income",
-        "region",
-    ]
     model = QRF()
-    model.fit(was_df[predictors], was_df[["has_fuel_consumption"]])
+    model.metadata = get_has_fuel_model_metadata()
+    model.fit(
+        was_df[HAS_FUEL_PREDICTOR_VARIABLES],
+        was_df[["has_fuel_consumption"]],
+    )
     model.save(model_path)
     return model
 
@@ -544,7 +605,7 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame)
 def uprate_lcfs_table(household: pd.DataFrame, time_period: str) -> pd.DataFrame:
     from policyengine_uk.system import system
 
-    start_period = 2021
+    start_period = CURRENT_LCFS_RELEASE.fuel_price_year
     target_year = int(str(time_period)[:4])
     for variable in FUEL_PRICE_PARAMETER_NAME:
         household[variable] *= fuel_spending_litre_proxy_uprating(
@@ -688,27 +749,35 @@ def save_imputation_models():
     from policyengine_uk_data.utils.qrf import QRF
 
     consumption = QRF()
+    consumption.metadata = get_consumption_model_metadata()
     lcfs_household = pd.read_csv(
-        LCFS_TAB_FOLDER / "lcfs_2021_dvhh_ukanon.tab",
+        LCFS_TAB_FOLDER / CURRENT_LCFS_RELEASE.household_tab_filename,
         delimiter="\t",
         low_memory=False,
     )
     lcfs_person = pd.read_csv(
-        LCFS_TAB_FOLDER / "lcfs_2021_dvper_ukanon202122.tab", delimiter="\t"
+        LCFS_TAB_FOLDER / CURRENT_LCFS_RELEASE.person_tab_filename,
+        delimiter="\t",
     )
     household = generate_lcfs_table(lcfs_person, lcfs_household)
     household = uprate_lcfs_table(household, str(CURRENT_FRS_RELEASE.base_year))
     consumption.fit(household[PREDICTOR_VARIABLES], household[IMPUTATIONS])
-    consumption.save(STORAGE_FOLDER / CONSUMPTION_MODEL_FILENAME)
+    consumption.save(get_consumption_model_path())
     return consumption
 
 
 def create_consumption_model(overwrite_existing: bool = False):
     from policyengine_uk_data.utils.qrf import QRF
 
-    model_path = STORAGE_FOLDER / CONSUMPTION_MODEL_FILENAME
+    model_path = get_consumption_model_path()
     if model_path.exists() and not overwrite_existing:
-        return QRF(file_path=model_path)
+        cached = QRF(file_path=model_path)
+        if _qrf_model_matches_current_metadata(
+            cached,
+            get_consumption_model_metadata(),
+            IMPUTATIONS,
+        ):
+            return cached
     return save_imputation_models()
 
 
diff --git a/policyengine_uk_data/datasets/imputations/services/etb.py b/policyengine_uk_data/datasets/imputations/services/etb.py
index 31678e42c..014fbe045 100644
--- a/policyengine_uk_data/datasets/imputations/services/etb.py
+++ b/policyengine_uk_data/datasets/imputations/services/etb.py
@@ -7,17 +7,16 @@
 
 import pandas as pd
 import numpy as np
-from pathlib import Path
-import logging
 from policyengine_uk import Microsimulation
-from huggingface_hub import hf_hub_download
-import os
+from policyengine_uk_data.datasets.private_releases import CURRENT_ETB_RELEASE
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk_data.utils.qrf import QRF
 from policyengine_uk.data import UKSingleYearDataset
 
 # Constants
 WEEKS_IN_YEAR = 52
+ETB_TAB_FOLDER = STORAGE_FOLDER / CURRENT_ETB_RELEASE.name
+PUBLIC_SERVICES_MODEL_FILENAME = f"public_services_{CURRENT_ETB_RELEASE.name}.pkl"
 
 # Variables used to predict public service receipt
 PREDICTORS = [
@@ -40,18 +39,41 @@
 ]
 
 
-def create_public_services_model(overwrite_existing: bool = False) -> None:
+def get_public_services_model_path():
+    return STORAGE_FOLDER / PUBLIC_SERVICES_MODEL_FILENAME
+
+
+def get_public_services_model_metadata() -> dict:
+    return {
+        "etb_release_name": CURRENT_ETB_RELEASE.name,
+        "etb_household_tab_filename": CURRENT_ETB_RELEASE.household_tab_filename,
+        "predictor_variables": tuple(PREDICTORS),
+        "output_variables": tuple(OUTPUTS),
+    }
+
+
+def _public_services_model_matches_current_release(model: QRF) -> bool:
+    if getattr(model, "metadata", {}) != get_public_services_model_metadata():
+        return False
+
+    trained_outputs = getattr(model.model, "imputed_variables", None)
+    return list(trained_outputs) == OUTPUTS
+
+
+def create_public_services_model(overwrite_existing: bool = False) -> QRF:
     """
     Create and save a model for imputing public service receipt values.
 
     Args:
         overwrite_existing: Whether to overwrite an existing model file.
     """
-    # Check if model already exists and we're not overwriting
-    if (STORAGE_FOLDER / "public_services.pkl").exists() and not overwrite_existing:
-        return
+    model_path = get_public_services_model_path()
+    if model_path.exists() and not overwrite_existing:
+        cached = QRF(file_path=model_path)
+        if _public_services_model_matches_current_release(cached):
+            return cached
 
-    etb_path = STORAGE_FOLDER / "etb_1977_21" / "householdv2_1977-2021.tab"
+    etb_path = ETB_TAB_FOLDER / CURRENT_ETB_RELEASE.household_tab_filename
 
     # Load Effects of Taxes and Benefits (ETB) dataset
     etb = pd.read_csv(etb_path, delimiter="\t")
@@ -102,7 +124,9 @@ def create_public_services_model(overwrite_existing: bool = False) -> None:
 
     # Train model
     model = QRF()
+    model.metadata = get_public_services_model_metadata()
     model.fit(X=train[PREDICTORS], y=train[OUTPUTS])
+    model.save(model_path)
 
     return model
 
diff --git a/policyengine_uk_data/datasets/imputations/vat.py b/policyengine_uk_data/datasets/imputations/vat.py
index 5b30b4ed8..7d3ee9564 100644
--- a/policyengine_uk_data/datasets/imputations/vat.py
+++ b/policyengine_uk_data/datasets/imputations/vat.py
@@ -13,20 +13,19 @@
 """
 
 import pandas as pd
-from pathlib import Path
-import numpy as np
+from policyengine_uk_data.datasets.private_releases import CURRENT_ETB_RELEASE
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk import Microsimulation
 
-ETB_TAB_FOLDER = STORAGE_FOLDER / "etb_1977_21"
+ETB_TAB_FOLDER = STORAGE_FOLDER / CURRENT_ETB_RELEASE.name
+VAT_MODEL_FILENAME = (
+    f"vat_{CURRENT_ETB_RELEASE.name}_{CURRENT_ETB_RELEASE.default_training_year}.pkl"
+)
 
-# Default ETB vintage used when training the imputation model. Kept at 2020
-# for backward compatibility with the checked-in vat.pkl fingerprint, but
-# exposed as a module constant rather than an inline magic number so later
-# updates require only a one-line change (not scattered `etb.year == 2020`
-# checks).
-DEFAULT_ETB_YEAR = 2020
+# Default ETB vintage used when training the imputation model. The ETB 1977-2024
+# file uses ``year == 2023`` for financial year ending 2024.
+DEFAULT_ETB_YEAR = CURRENT_ETB_RELEASE.default_training_year
 
 # Fallback VAT parameters used when `policyengine_uk` is unavailable (e.g.
 # unit-test environments). Values match the 2020-21 UK statutory position.
@@ -40,12 +39,38 @@
 VAT_RATE_BY_YEAR: dict[int, tuple[float, float]] = {
     2020: (0.2, 0.03),
     2021: (0.2, 0.03),
+    2022: (0.2, 0.03),
+    2023: (0.2, 0.03),
 }
 
 PREDICTORS = ["is_adult", "is_child", "is_SP_age", "household_net_income"]
 IMPUTATIONS = ["full_rate_vat_expenditure_rate"]
 
 
+def get_vat_model_path(year: int = DEFAULT_ETB_YEAR):
+    if year == DEFAULT_ETB_YEAR:
+        return STORAGE_FOLDER / VAT_MODEL_FILENAME
+    return STORAGE_FOLDER / f"vat_{CURRENT_ETB_RELEASE.name}_{year}.pkl"
+
+
+def get_vat_model_metadata(year: int = DEFAULT_ETB_YEAR) -> dict:
+    return {
+        "etb_release_name": CURRENT_ETB_RELEASE.name,
+        "etb_household_tab_filename": CURRENT_ETB_RELEASE.household_tab_filename,
+        "training_year": year,
+        "predictor_variables": tuple(PREDICTORS),
+        "impute_variables": tuple(IMPUTATIONS),
+    }
+
+
+def _vat_model_matches_current_release(model, year: int = DEFAULT_ETB_YEAR) -> bool:
+    if getattr(model, "metadata", {}) != get_vat_model_metadata(year):
+        return False
+
+    trained_outputs = getattr(model.model, "imputed_variables", None)
+    return list(trained_outputs) == IMPUTATIONS
+
+
 def _get_vat_parameters(year: int) -> tuple[float, float]:
     """Return ``(standard_rate, reduced_rate_share)`` for the given calendar year.
 
@@ -106,15 +131,16 @@ def save_imputation_models(year: int = DEFAULT_ETB_YEAR):
     from policyengine_uk_data.utils.qrf import QRF
 
     vat = QRF()
+    vat.metadata = get_vat_model_metadata(year)
     etb = pd.read_csv(
-        ETB_TAB_FOLDER / "householdv2_1977-2021.tab",
+        ETB_TAB_FOLDER / CURRENT_ETB_RELEASE.household_tab_filename,
         delimiter="\t",
         low_memory=False,
     )
     etb = generate_etb_table(etb, year=year)
     etb = etb[PREDICTORS + IMPUTATIONS]
     vat.fit(etb[PREDICTORS], etb[IMPUTATIONS])
-    vat.save(STORAGE_FOLDER / "vat.pkl")
+    vat.save(get_vat_model_path(year))
     return vat
 
 
@@ -130,8 +156,11 @@ def create_vat_model(overwrite_existing: bool = False):
     """
     from policyengine_uk_data.utils.qrf import QRF
 
-    if (STORAGE_FOLDER / "vat.pkl").exists() and not overwrite_existing:
-        return QRF(file_path=STORAGE_FOLDER / "vat.pkl")
+    model_path = get_vat_model_path()
+    if model_path.exists() and not overwrite_existing:
+        cached = QRF(file_path=model_path)
+        if _vat_model_matches_current_release(cached):
+            return cached
     return save_imputation_models()
 
 
diff --git a/policyengine_uk_data/datasets/imputations/wealth.py b/policyengine_uk_data/datasets/imputations/wealth.py
index 0b67cb92f..36c5fd4dc 100644
--- a/policyengine_uk_data/datasets/imputations/wealth.py
+++ b/policyengine_uk_data/datasets/imputations/wealth.py
@@ -8,12 +8,14 @@
 
 import numpy as np
 import pandas as pd
+from policyengine_uk_data.datasets.private_releases import CURRENT_WAS_RELEASE
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk import Microsimulation
 from policyengine_uk_data.utils.qrf import QRF
 
-WAS_TAB_FOLDER = STORAGE_FOLDER / "was_2006_20"
+WAS_TAB_FOLDER = STORAGE_FOLDER / CURRENT_WAS_RELEASE.name
+WEALTH_MODEL_FILENAME = f"wealth_{CURRENT_WAS_RELEASE.name}.pkl"
 
 REGIONS = {
     1: "NORTH_EAST",
@@ -58,45 +60,45 @@
 ]
 
 WAS_RENAMES = {
-    "R7xshhwgt": "household_weight",
+    "R8xshhwgt": "household_weight",
     # Components for estimating land holdings.
-    "DVLUKValR7_sum": "owned_land",  # In the UK.
-    "DVPropertyR7": "property_wealth",
-    "DVFESHARESR7_aggr": "emp_shares_options",
-    "DVFShUKVR7_aggr": "uk_shares",
-    "DVIISAVR7_aggr": "investment_isas",
-    "DVFCollVR7_aggr": "unit_investment_trusts",
-    "TotpenR7_aggr": "pensions",
-    "DvvalDBTR7_aggr": "db_pensions",
+    "DVLUKValR8_sum": "owned_land",  # In the UK.
+    "DVPropertyR8": "property_wealth",
+    "DVFESHARESR8_aggr": "emp_shares_options",
+    "DVFShUKVR8_aggr": "uk_shares",
+    "DVIISAVR8_aggr": "investment_isas",
+    "DVFCollVR8_aggr": "unit_investment_trusts",
+    "totalpenr8_aggr": "pensions",
+    "dvvaldbt_scaper8_aggr": "db_pensions",
     # Predictors for fusing to FRS.
-    "dvtotgirR7": "gross_income",
-    "NumAdultW7": "num_adults",
-    "NumCh18W7": "num_children",
+    "dvtotgirR8": "gross_income",
+    "NumAdultR8": "num_adults",
+    "NumCh18R8": "num_children",
     # Household Gross Annual income from occupational or private pensions
-    "DVGIPPENR7_AGGR": "private_pension_income",
-    "DVGISER7_AGGR": "self_employment_income",
+    "DVGIPPENR8_AGGR": "private_pension_income",
+    "DVGISER8_AGGR": "self_employment_income",
     # Household Gross annual income from investments
-    "DVGIINVR7_aggr": "capital_income",
+    "DVGIINVR8_aggr": "capital_income",
     # Household Total Annual Gross employee income
-    "DVGIEMPR7_AGGR": "employment_income",
-    "HBedrmW7": "num_bedrooms",
-    "GORR7": "region",
-    "DVPriRntW7": "is_renter",  # {1, 2} TODO: Get codebook values.
-    "CTAmtW7": "council_tax",
+    "DVGIEMPR8_AGGR": "employment_income",
+    "HBedRmR8": "num_bedrooms",
+    "GORR8": "region",
+    "DVPriRntR8": "is_renter",  # {1, 2} TODO: Get codebook values.
+    "CTAmtR8": "council_tax",
     # Other columns for reference.
-    "DVLOSValR7_sum": "non_uk_land",
-    "HFINWNTR7_Sum": "net_financial_wealth",
-    "DVLUKDebtR7_sum": "uk_land_debt",
-    "HFINWR7_Sum": "gross_financial_wealth",
-    "TotWlthR7": "wealth",
-    "DVhvalueR7": "main_residence_value",
-    "DVHseValR7_sum": "other_residential_property_value",
-    "DVBlDValR7_sum": "non_residential_property_value",
-    "DVTotinc_bhcR7": "household_net_income",
-    "DVSaValR7_aggr": "savings",
-    "vcarnr7": "num_vehicles",
-    "Tot_LosR7_aggr": "total_loans",
-    "Tot_los_exc_SLCR7_aggr": "total_loans_exc_slc",
+    "DVLOSValR8_sum": "non_uk_land",
+    "HFINWNTR8_Sum": "net_financial_wealth",
+    "DVLUKDebtR8_sum": "uk_land_debt",
+    "HFINWR8_SUM": "gross_financial_wealth",
+    "TotalWlthR8": "wealth",
+    "DVhvalueR8": "main_residence_value",
+    "DVHseValR8_sum": "other_residential_property_value",
+    "DVBlDValR8_sum": "non_residential_property_value",
+    "DVTotinc_bhcR8": "household_net_income",
+    "DVSaValR8_aggr": "savings",
+    "vcarnr8": "num_vehicles",
+    "Tot_LosR8_aggr": "total_loans",
+    "Tot_los_exc_SLCR8_aggr": "total_loans_exc_slc",
 }
 
 
@@ -155,8 +157,27 @@ def generate_was_table(was: pd.DataFrame):
     return was
 
 
-def _wealth_model_outputs_are_current(model: QRF) -> bool:
-    """Check whether a cached wealth model includes all current output columns."""
+WEALTH_MODEL_METADATA = {
+    "was_release_name": CURRENT_WAS_RELEASE.name,
+    "was_household_tab_filename": CURRENT_WAS_RELEASE.household_tab_filename,
+    "predictor_variables": tuple(PREDICTOR_VARIABLES),
+    "impute_variables": tuple(IMPUTE_VARIABLES),
+}
+
+
+def get_wealth_model_metadata() -> dict:
+    return dict(WEALTH_MODEL_METADATA)
+
+
+def get_wealth_model_path():
+    return STORAGE_FOLDER / WEALTH_MODEL_FILENAME
+
+
+def _wealth_model_matches_current_release(model: QRF) -> bool:
+    """Check whether a cached wealth model was trained with current inputs."""
+    if getattr(model, "metadata", {}) != get_wealth_model_metadata():
+        return False
+
     trained_outputs = getattr(model.model, "imputed_variables", None)
     return list(trained_outputs) == IMPUTE_VARIABLES
 
@@ -256,19 +277,20 @@ def save_imputation_models():
         Trained QRF model.
     """
     was = pd.read_csv(
-        WAS_TAB_FOLDER / "was_round_7_hhold_eul_march_2022.tab",
+        WAS_TAB_FOLDER / CURRENT_WAS_RELEASE.household_tab_filename,
         sep="\t",
         low_memory=False,
     )
     was = generate_was_table(was)
 
     wealth = QRF()
+    wealth.metadata = get_wealth_model_metadata()
 
     wealth.fit(
         was[PREDICTOR_VARIABLES],
         was[IMPUTE_VARIABLES],
     )
-    wealth.save(STORAGE_FOLDER / "wealth.pkl")
+    wealth.save(get_wealth_model_path())
     return wealth
 
 
@@ -282,9 +304,10 @@ def create_wealth_model(overwrite_existing: bool = False):
     Returns:
         QRF model for wealth imputation.
     """
-    if (STORAGE_FOLDER / "wealth.pkl").exists() and not overwrite_existing:
-        wealth = QRF(file_path=STORAGE_FOLDER / "wealth.pkl")
-        if _wealth_model_outputs_are_current(wealth):
+    model_path = get_wealth_model_path()
+    if model_path.exists() and not overwrite_existing:
+        wealth = QRF(file_path=model_path)
+        if _wealth_model_matches_current_release(wealth):
             return wealth
     return save_imputation_models()
 
diff --git a/policyengine_uk_data/datasets/private_releases.py b/policyengine_uk_data/datasets/private_releases.py
new file mode 100644
index 000000000..63e9a9c5d
--- /dev/null
+++ b/policyengine_uk_data/datasets/private_releases.py
@@ -0,0 +1,79 @@
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class LCFSRelease:
+    name: str
+    survey_year: int
+    fuel_price_year: int
+    ukds_study_number: int
+    doi: str
+    household_tab_filename: str
+    person_tab_filename: str
+
+    @property
+    def raw_zip_name(self) -> str:
+        return f"{self.name}.zip"
+
+
+@dataclass(frozen=True)
+class WASRelease:
+    name: str
+    latest_round: int
+    end_year: int
+    ukds_study_number: int
+    doi: str
+    household_tab_filename: str
+    person_tab_filename: str
+
+    @property
+    def raw_zip_name(self) -> str:
+        return f"{self.name}.zip"
+
+
+@dataclass(frozen=True)
+class ETBRelease:
+    name: str
+    latest_year: int
+    default_training_year: int
+    ukds_study_number: int
+    doi: str
+    household_tab_filename: str
+    person_tab_filename: str
+
+    @property
+    def raw_zip_name(self) -> str:
+        return f"{self.name}.zip"
+
+
+CURRENT_LCFS_RELEASE = LCFSRelease(
+    name="lcfs_2023_24",
+    survey_year=2023,
+    fuel_price_year=2023,
+    ukds_study_number=9468,
+    doi="10.5255/UKDA-SN-9468-3",
+    household_tab_filename="9468_dvhh_ukanon_v2_2023.tab",
+    person_tab_filename="9468_dvper_ukanon_202324_2023.tab",
+)
+
+
+CURRENT_WAS_RELEASE = WASRelease(
+    name="was_2006_22",
+    latest_round=8,
+    end_year=2022,
+    ukds_study_number=7215,
+    doi="10.5255/UKDA-SN-7215-20",
+    household_tab_filename="7215_was_round_8_hhold_eul_may_2025_230525.tab",
+    person_tab_filename="7215_was_round_8_person_eul_may_2025_230525.tab",
+)
+
+
+CURRENT_ETB_RELEASE = ETBRelease(
+    name="etb_1977_24",
+    latest_year=2024,
+    default_training_year=2023,
+    ukds_study_number=8856,
+    doi="10.5255/UKDA-SN-8856-4",
+    household_tab_filename="8856_householdv2_1977-2024.tab",
+    person_tab_filename="8856_personv2_2018-2024.tab",
+)
diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py
index 815759a76..446ee345e 100644
--- a/policyengine_uk_data/storage/download_private_prerequisites.py
+++ b/policyengine_uk_data/storage/download_private_prerequisites.py
@@ -1,4 +1,9 @@
 from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE
+from policyengine_uk_data.datasets.private_releases import (
+    CURRENT_ETB_RELEASE,
+    CURRENT_LCFS_RELEASE,
+    CURRENT_WAS_RELEASE,
+)
 from policyengine_uk_data.datasets.spi import SPI_RELEASE_NAME
 from policyengine_uk_data.utils.hf_destinations import PRIVATE_REPO
 from policyengine_uk_data.utils.huggingface import download
@@ -11,9 +16,9 @@
 
 PRIVATE_PREREQUISITES = [
     (CURRENT_FRS_RELEASE.raw_zip_name, CURRENT_FRS_RELEASE.ukds_tab_subdir),
-    ("lcfs_2021_22.zip", None),
-    ("was_2006_20.zip", None),
-    ("etb_1977_21.zip", None),
+    (CURRENT_LCFS_RELEASE.raw_zip_name, None),
+    (CURRENT_WAS_RELEASE.raw_zip_name, None),
+    (CURRENT_ETB_RELEASE.raw_zip_name, None),
     (f"{SPI_RELEASE_NAME}.zip", None),
 ]
 
diff --git a/policyengine_uk_data/tests/test_frs_prerequisites.py b/policyengine_uk_data/tests/test_frs_prerequisites.py
index 5c22c50b9..22c5fb2d1 100644
--- a/policyengine_uk_data/tests/test_frs_prerequisites.py
+++ b/policyengine_uk_data/tests/test_frs_prerequisites.py
@@ -10,6 +10,11 @@
     _needs_calibration_year_materialization,
 )
 from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE
+from policyengine_uk_data.datasets.private_releases import (
+    CURRENT_ETB_RELEASE,
+    CURRENT_LCFS_RELEASE,
+    CURRENT_WAS_RELEASE,
+)
 from policyengine_uk_data.datasets.spi import SPI_RELEASE_NAME
 from policyengine_uk_data.storage.download_private_prerequisites import (
     PRIVATE_PREREQUISITES,
@@ -31,6 +36,27 @@ def test_private_prerequisites_use_current_spi_release():
     assert "spi_2020_21.zip" not in prerequisite_names
 
 
+def test_private_prerequisites_use_current_lcfs_release():
+    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+
+    assert CURRENT_LCFS_RELEASE.raw_zip_name in prerequisite_names
+    assert "lcfs_2021_22.zip" not in prerequisite_names
+
+
+def test_private_prerequisites_use_current_was_release():
+    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+
+    assert CURRENT_WAS_RELEASE.raw_zip_name in prerequisite_names
+    assert "was_2006_20.zip" not in prerequisite_names
+
+
+def test_private_prerequisites_use_current_etb_release():
+    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+
+    assert CURRENT_ETB_RELEASE.raw_zip_name in prerequisite_names
+    assert "etb_1977_21.zip" not in prerequisite_names
+
+
 def test_current_frs_release_uses_survey_year_as_base_year():
     assert CURRENT_FRS_RELEASE.base_year == CURRENT_FRS_RELEASE.survey_year
 
diff --git a/policyengine_uk_data/tests/test_private_releases.py b/policyengine_uk_data/tests/test_private_releases.py
new file mode 100644
index 000000000..a54e38672
--- /dev/null
+++ b/policyengine_uk_data/tests/test_private_releases.py
@@ -0,0 +1,72 @@
+from policyengine_uk_data.datasets.private_releases import (
+    CURRENT_ETB_RELEASE,
+    CURRENT_LCFS_RELEASE,
+    CURRENT_WAS_RELEASE,
+)
+
+
+def test_current_lcfs_release_points_to_2023_24_ukds_files():
+    assert CURRENT_LCFS_RELEASE.name == "lcfs_2023_24"
+    assert CURRENT_LCFS_RELEASE.ukds_study_number == 9468
+    assert CURRENT_LCFS_RELEASE.doi == "10.5255/UKDA-SN-9468-3"
+    assert CURRENT_LCFS_RELEASE.household_tab_filename == "9468_dvhh_ukanon_v2_2023.tab"
+    assert (
+        CURRENT_LCFS_RELEASE.person_tab_filename == "9468_dvper_ukanon_202324_2023.tab"
+    )
+    assert CURRENT_LCFS_RELEASE.fuel_price_year == 2023
+
+
+def test_current_was_release_points_to_round_8_ukds_files():
+    assert CURRENT_WAS_RELEASE.name == "was_2006_22"
+    assert CURRENT_WAS_RELEASE.latest_round == 8
+    assert CURRENT_WAS_RELEASE.ukds_study_number == 7215
+    assert CURRENT_WAS_RELEASE.doi == "10.5255/UKDA-SN-7215-20"
+    assert (
+        CURRENT_WAS_RELEASE.household_tab_filename
+        == "7215_was_round_8_hhold_eul_may_2025_230525.tab"
+    )
+
+
+def test_current_etb_release_points_to_2023_24_ukds_files():
+    assert CURRENT_ETB_RELEASE.name == "etb_1977_24"
+    assert CURRENT_ETB_RELEASE.latest_year == 2024
+    assert CURRENT_ETB_RELEASE.default_training_year == 2023
+    assert CURRENT_ETB_RELEASE.ukds_study_number == 8856
+    assert CURRENT_ETB_RELEASE.doi == "10.5255/UKDA-SN-8856-4"
+    assert (
+        CURRENT_ETB_RELEASE.household_tab_filename == "8856_householdv2_1977-2024.tab"
+    )
+
+
+def test_consumption_model_metadata_tracks_private_releases():
+    from policyengine_uk_data.datasets.imputations.consumption import (
+        CONSUMPTION_MODEL_FILENAME,
+        get_consumption_model_metadata,
+        get_has_fuel_model_metadata,
+    )
+
+    metadata = get_consumption_model_metadata()
+    has_fuel_metadata = get_has_fuel_model_metadata()
+
+    assert CURRENT_LCFS_RELEASE.name in CONSUMPTION_MODEL_FILENAME
+    assert CURRENT_WAS_RELEASE.name in CONSUMPTION_MODEL_FILENAME
+    assert metadata["lcfs_release_name"] == CURRENT_LCFS_RELEASE.name
+    assert metadata["was_release_name"] == CURRENT_WAS_RELEASE.name
+    assert has_fuel_metadata["was_release_name"] == CURRENT_WAS_RELEASE.name
+
+
+def test_etb_model_metadata_tracks_private_release():
+    from policyengine_uk_data.datasets.imputations.services.etb import (
+        get_public_services_model_metadata,
+    )
+    from policyengine_uk_data.datasets.imputations.vat import (
+        DEFAULT_ETB_YEAR,
+        get_vat_model_metadata,
+    )
+
+    vat_metadata = get_vat_model_metadata()
+    services_metadata = get_public_services_model_metadata()
+
+    assert DEFAULT_ETB_YEAR == CURRENT_ETB_RELEASE.default_training_year
+    assert vat_metadata["etb_release_name"] == CURRENT_ETB_RELEASE.name
+    assert services_metadata["etb_release_name"] == CURRENT_ETB_RELEASE.name
diff --git a/policyengine_uk_data/tests/test_road_fuel_volume_uprating.py b/policyengine_uk_data/tests/test_road_fuel_volume_uprating.py
index 07ddbec23..e44a1ade9 100644
--- a/policyengine_uk_data/tests/test_road_fuel_volume_uprating.py
+++ b/policyengine_uk_data/tests/test_road_fuel_volume_uprating.py
@@ -13,6 +13,10 @@
     fuel_spending_litre_proxy_uprating,
     uprate_lcfs_table,
 )
+from policyengine_uk_data.datasets.private_releases import (
+    CURRENT_LCFS_RELEASE,
+    CURRENT_WAS_RELEASE,
+)
 from policyengine_uk_data.sources.road_fuel_volume import (
     FISCAL_YEAR_AVERAGE_DUTY_RATE,
     HMRC_ROAD_FUEL_CLEARANCES_MLITRES,
@@ -167,23 +171,24 @@ def test__given_lcfs_training_table__then_fuel_uprating_preserves_litre_proxy():
 
     # When
     out = uprate_lcfs_table(household.copy(), "2024")
+    start_year = CURRENT_LCFS_RELEASE.fuel_price_year
     petrol_expected = fuel_spending_litre_proxy_uprating(
         variable="petrol_spending",
-        start_year=2021,
+        start_year=start_year,
         end_year=2024,
     )
     diesel_expected = fuel_spending_litre_proxy_uprating(
         variable="diesel_spending",
-        start_year=2021,
+        start_year=start_year,
         end_year=2024,
     )
-    volume_only = road_fuel_volume_uprating(start_year=2021, end_year=2024)
+    volume_only = road_fuel_volume_uprating(start_year=start_year, end_year=2024)
 
     # Then
     assert out["petrol_spending"].iloc[0] == petrol_expected
     assert out["diesel_spending"].iloc[0] == diesel_expected
-    assert petrol_expected > volume_only
-    assert diesel_expected > volume_only
+    assert petrol_expected != volume_only
+    assert diesel_expected != volume_only
     assert petrol_expected != 1.3
 
 
@@ -191,6 +196,8 @@ def test__given_fuel_method_change__then_consumption_model_filename_is_versioned
     # Then
     assert CONSUMPTION_MODEL_FILENAME != "consumption.pkl"
     assert "fuel_litre_proxy" in CONSUMPTION_MODEL_FILENAME
+    assert CURRENT_LCFS_RELEASE.name in CONSUMPTION_MODEL_FILENAME
+    assert CURRENT_WAS_RELEASE.name in CONSUMPTION_MODEL_FILENAME
 
 
 def test__given_obr_2027_volume__then_rate_difference_matches_cost_benchmark():
diff --git a/policyengine_uk_data/tests/test_student_loan_balance.py b/policyengine_uk_data/tests/test_student_loan_balance.py
index b18c3c8f8..ea95bdf61 100644
--- a/policyengine_uk_data/tests/test_student_loan_balance.py
+++ b/policyengine_uk_data/tests/test_student_loan_balance.py
@@ -17,13 +17,13 @@
 
 def test_generate_was_table_derives_student_loan_balance():
     row = {column: 0 for column in wealth.WAS_RENAMES}
-    row["R7xshhwgt"] = 1
-    row["GORR7"] = 11
-    row["DVPriRntW7"] = 1
-    row["TotpenR7_aggr"] = 100
-    row["DvvalDBTR7_aggr"] = 25
-    row["Tot_LosR7_aggr"] = 20_000
-    row["Tot_los_exc_SLCR7_aggr"] = 5_000
+    row["R8xshhwgt"] = 1
+    row["GORR8"] = 11
+    row["DVPriRntR8"] = 1
+    row["totalpenr8_aggr"] = 100
+    row["dvvaldbt_scaper8_aggr"] = 25
+    row["Tot_LosR8_aggr"] = 20_000
+    row["Tot_los_exc_SLCR8_aggr"] = 5_000
 
     was = wealth.generate_was_table(pd.DataFrame([row]))
 
@@ -33,15 +33,17 @@ def test_generate_was_table_derives_student_loan_balance():
 
 
 def test_create_wealth_model_reuses_current_cached_model(tmp_path, monkeypatch):
-    model_path = tmp_path / "wealth.pkl"
+    model_path = tmp_path / wealth.WEALTH_MODEL_FILENAME
     model_path.write_bytes(b"placeholder")
     cached_model = SimpleNamespace(
-        model=SimpleNamespace(imputed_variables=list(wealth.IMPUTE_VARIABLES))
+        metadata=wealth.get_wealth_model_metadata(),
+        model=SimpleNamespace(imputed_variables=list(wealth.IMPUTE_VARIABLES)),
     )
 
     class DummyQRF:
         def __init__(self, file_path=None):
             assert file_path == model_path
+            self.metadata = cached_model.metadata
             self.model = cached_model.model
 
     monkeypatch.setattr(wealth, "STORAGE_FOLDER", tmp_path)
@@ -57,12 +59,13 @@ def __init__(self, file_path=None):
 
 
 def test_create_wealth_model_retrains_when_cached_outputs_stale(tmp_path, monkeypatch):
-    model_path = tmp_path / "wealth.pkl"
+    model_path = tmp_path / wealth.WEALTH_MODEL_FILENAME
     model_path.write_bytes(b"placeholder")
 
     class DummyQRF:
         def __init__(self, file_path=None):
             assert file_path == model_path
+            self.metadata = wealth.get_wealth_model_metadata()
             self.model = SimpleNamespace(imputed_variables=["owned_land"])
 
     fresh_model = object()
@@ -74,6 +77,30 @@ def __init__(self, file_path=None):
     assert wealth.create_wealth_model() is fresh_model
 
 
+def test_create_wealth_model_retrains_when_cached_release_stale(tmp_path, monkeypatch):
+    model_path = tmp_path / wealth.WEALTH_MODEL_FILENAME
+    model_path.write_bytes(b"placeholder")
+
+    class DummyQRF:
+        def __init__(self, file_path=None):
+            assert file_path == model_path
+            self.metadata = {
+                **wealth.get_wealth_model_metadata(),
+                "was_release_name": "was_2006_20",
+            }
+            self.model = SimpleNamespace(
+                imputed_variables=list(wealth.IMPUTE_VARIABLES)
+            )
+
+    fresh_model = object()
+
+    monkeypatch.setattr(wealth, "STORAGE_FOLDER", tmp_path)
+    monkeypatch.setattr(wealth, "QRF", DummyQRF)
+    monkeypatch.setattr(wealth, "save_imputation_models", lambda: fresh_model)
+
+    assert wealth.create_wealth_model() is fresh_model
+
+
 def test_allocate_student_loan_balance_prefers_repayers_then_tertiary():
     person = pd.DataFrame(
         {
diff --git a/policyengine_uk_data/tests/test_vat_parameters.py b/policyengine_uk_data/tests/test_vat_parameters.py
index 2c6b0e8d3..d99c04f0a 100644
--- a/policyengine_uk_data/tests/test_vat_parameters.py
+++ b/policyengine_uk_data/tests/test_vat_parameters.py
@@ -40,6 +40,13 @@ def test_vat_rate_by_year_fallback_matches_2020_statute():
     assert VAT_RATE_BY_YEAR[2020] == (0.2, 0.03)
 
 
+def test_default_etb_year_tracks_current_release():
+    from policyengine_uk_data.datasets.imputations.vat import DEFAULT_ETB_YEAR
+    from policyengine_uk_data.datasets.private_releases import CURRENT_ETB_RELEASE
+
+    assert DEFAULT_ETB_YEAR == CURRENT_ETB_RELEASE.default_training_year == 2023
+
+
 def test_generate_etb_table_uses_year_param():
     """Changing the `year` arg filters ETB rows by that year.
 

From d5d95b72dd04f82da94ea421cd062255afbb1988 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 24 May 2026 07:49:30 -0400
Subject: [PATCH 2/4] Use current UKDS tab zip layouts

---
 .../datasets/private_releases.py              | 18 +++++++++-----
 .../storage/download_private_prerequisites.py |  6 ++---
 .../tests/test_frs_prerequisites.py           | 24 ++++++++++++-------
 .../tests/test_private_releases.py            | 20 +++++++++-------
 4 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/policyengine_uk_data/datasets/private_releases.py b/policyengine_uk_data/datasets/private_releases.py
index 63e9a9c5d..55bc8a08c 100644
--- a/policyengine_uk_data/datasets/private_releases.py
+++ b/policyengine_uk_data/datasets/private_releases.py
@@ -8,6 +8,7 @@ class LCFSRelease:
     fuel_price_year: int
     ukds_study_number: int
     doi: str
+    ukds_tab_subdir: str
     household_tab_filename: str
     person_tab_filename: str
 
@@ -23,6 +24,7 @@ class WASRelease:
     end_year: int
     ukds_study_number: int
     doi: str
+    ukds_tab_subdir: str
     household_tab_filename: str
     person_tab_filename: str
 
@@ -38,6 +40,7 @@ class ETBRelease:
     default_training_year: int
     ukds_study_number: int
     doi: str
+    ukds_tab_subdir: str
     household_tab_filename: str
     person_tab_filename: str
 
@@ -52,8 +55,9 @@ def raw_zip_name(self) -> str:
     fuel_price_year=2023,
     ukds_study_number=9468,
     doi="10.5255/UKDA-SN-9468-3",
-    household_tab_filename="9468_dvhh_ukanon_v2_2023.tab",
-    person_tab_filename="9468_dvper_ukanon_202324_2023.tab",
+    ukds_tab_subdir="UKDA-9468-tab/tab",
+    household_tab_filename="dvhh_ukanon_v2_2023.tab",
+    person_tab_filename="dvper_ukanon_202324_2023.tab",
 )
 
 
@@ -63,8 +67,9 @@ def raw_zip_name(self) -> str:
     end_year=2022,
     ukds_study_number=7215,
     doi="10.5255/UKDA-SN-7215-20",
-    household_tab_filename="7215_was_round_8_hhold_eul_may_2025_230525.tab",
-    person_tab_filename="7215_was_round_8_person_eul_may_2025_230525.tab",
+    ukds_tab_subdir="UKDA-7215-tab/tab",
+    household_tab_filename="was_round_8_hhold_eul_may_2025_230525.tab",
+    person_tab_filename="was_round_8_person_eul_may_2025_230525.tab",
 )
 
 
@@ -74,6 +79,7 @@ def raw_zip_name(self) -> str:
     default_training_year=2023,
     ukds_study_number=8856,
     doi="10.5255/UKDA-SN-8856-4",
-    household_tab_filename="8856_householdv2_1977-2024.tab",
-    person_tab_filename="8856_personv2_2018-2024.tab",
+    ukds_tab_subdir="UKDA-8856-tab/tab",
+    household_tab_filename="householdv2_1977-2024.tab",
+    person_tab_filename="personv2_2018-2024.tab",
 )
diff --git a/policyengine_uk_data/storage/download_private_prerequisites.py b/policyengine_uk_data/storage/download_private_prerequisites.py
index 446ee345e..35fe24f4e 100644
--- a/policyengine_uk_data/storage/download_private_prerequisites.py
+++ b/policyengine_uk_data/storage/download_private_prerequisites.py
@@ -16,9 +16,9 @@
 
 PRIVATE_PREREQUISITES = [
     (CURRENT_FRS_RELEASE.raw_zip_name, CURRENT_FRS_RELEASE.ukds_tab_subdir),
-    (CURRENT_LCFS_RELEASE.raw_zip_name, None),
-    (CURRENT_WAS_RELEASE.raw_zip_name, None),
-    (CURRENT_ETB_RELEASE.raw_zip_name, None),
+    (CURRENT_LCFS_RELEASE.raw_zip_name, CURRENT_LCFS_RELEASE.ukds_tab_subdir),
+    (CURRENT_WAS_RELEASE.raw_zip_name, CURRENT_WAS_RELEASE.ukds_tab_subdir),
+    (CURRENT_ETB_RELEASE.raw_zip_name, CURRENT_ETB_RELEASE.ukds_tab_subdir),
     (f"{SPI_RELEASE_NAME}.zip", None),
 ]
 
diff --git a/policyengine_uk_data/tests/test_frs_prerequisites.py b/policyengine_uk_data/tests/test_frs_prerequisites.py
index 22c5fb2d1..6a1d12911 100644
--- a/policyengine_uk_data/tests/test_frs_prerequisites.py
+++ b/policyengine_uk_data/tests/test_frs_prerequisites.py
@@ -37,24 +37,30 @@ def test_private_prerequisites_use_current_spi_release():
 
 
 def test_private_prerequisites_use_current_lcfs_release():
-    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+    prerequisites = dict(PRIVATE_PREREQUISITES)
 
-    assert CURRENT_LCFS_RELEASE.raw_zip_name in prerequisite_names
-    assert "lcfs_2021_22.zip" not in prerequisite_names
+    assert prerequisites[CURRENT_LCFS_RELEASE.raw_zip_name] == (
+        CURRENT_LCFS_RELEASE.ukds_tab_subdir
+    )
+    assert "lcfs_2021_22.zip" not in prerequisites
 
 
 def test_private_prerequisites_use_current_was_release():
-    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+    prerequisites = dict(PRIVATE_PREREQUISITES)
 
-    assert CURRENT_WAS_RELEASE.raw_zip_name in prerequisite_names
-    assert "was_2006_20.zip" not in prerequisite_names
+    assert prerequisites[CURRENT_WAS_RELEASE.raw_zip_name] == (
+        CURRENT_WAS_RELEASE.ukds_tab_subdir
+    )
+    assert "was_2006_20.zip" not in prerequisites
 
 
 def test_private_prerequisites_use_current_etb_release():
-    prerequisite_names = [filename for filename, _ in PRIVATE_PREREQUISITES]
+    prerequisites = dict(PRIVATE_PREREQUISITES)
 
-    assert CURRENT_ETB_RELEASE.raw_zip_name in prerequisite_names
-    assert "etb_1977_21.zip" not in prerequisite_names
+    assert prerequisites[CURRENT_ETB_RELEASE.raw_zip_name] == (
+        CURRENT_ETB_RELEASE.ukds_tab_subdir
+    )
+    assert "etb_1977_21.zip" not in prerequisites
 
 
 def test_current_frs_release_uses_survey_year_as_base_year():
diff --git a/policyengine_uk_data/tests/test_private_releases.py b/policyengine_uk_data/tests/test_private_releases.py
index a54e38672..dc84bd01c 100644
--- a/policyengine_uk_data/tests/test_private_releases.py
+++ b/policyengine_uk_data/tests/test_private_releases.py
@@ -9,10 +9,9 @@ def test_current_lcfs_release_points_to_2023_24_ukds_files():
     assert CURRENT_LCFS_RELEASE.name == "lcfs_2023_24"
     assert CURRENT_LCFS_RELEASE.ukds_study_number == 9468
     assert CURRENT_LCFS_RELEASE.doi == "10.5255/UKDA-SN-9468-3"
-    assert CURRENT_LCFS_RELEASE.household_tab_filename == "9468_dvhh_ukanon_v2_2023.tab"
-    assert (
-        CURRENT_LCFS_RELEASE.person_tab_filename == "9468_dvper_ukanon_202324_2023.tab"
-    )
+    assert CURRENT_LCFS_RELEASE.ukds_tab_subdir == "UKDA-9468-tab/tab"
+    assert CURRENT_LCFS_RELEASE.household_tab_filename == "dvhh_ukanon_v2_2023.tab"
+    assert CURRENT_LCFS_RELEASE.person_tab_filename == "dvper_ukanon_202324_2023.tab"
     assert CURRENT_LCFS_RELEASE.fuel_price_year == 2023
 
 
@@ -21,9 +20,14 @@ def test_current_was_release_points_to_round_8_ukds_files():
     assert CURRENT_WAS_RELEASE.latest_round == 8
     assert CURRENT_WAS_RELEASE.ukds_study_number == 7215
     assert CURRENT_WAS_RELEASE.doi == "10.5255/UKDA-SN-7215-20"
+    assert CURRENT_WAS_RELEASE.ukds_tab_subdir == "UKDA-7215-tab/tab"
     assert (
         CURRENT_WAS_RELEASE.household_tab_filename
-        == "7215_was_round_8_hhold_eul_may_2025_230525.tab"
+        == "was_round_8_hhold_eul_may_2025_230525.tab"
+    )
+    assert (
+        CURRENT_WAS_RELEASE.person_tab_filename
+        == "was_round_8_person_eul_may_2025_230525.tab"
     )
 
 
@@ -33,9 +37,9 @@ def test_current_etb_release_points_to_2023_24_ukds_files():
     assert CURRENT_ETB_RELEASE.default_training_year == 2023
     assert CURRENT_ETB_RELEASE.ukds_study_number == 8856
     assert CURRENT_ETB_RELEASE.doi == "10.5255/UKDA-SN-8856-4"
-    assert (
-        CURRENT_ETB_RELEASE.household_tab_filename == "8856_householdv2_1977-2024.tab"
-    )
+    assert CURRENT_ETB_RELEASE.ukds_tab_subdir == "UKDA-8856-tab/tab"
+    assert CURRENT_ETB_RELEASE.household_tab_filename == "householdv2_1977-2024.tab"
+    assert CURRENT_ETB_RELEASE.person_tab_filename == "personv2_2018-2024.tab"
 
 
 def test_consumption_model_metadata_tracks_private_releases():

From 89fa2e781c3209d0876c36cf81f4c7d9bf220454 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 24 May 2026 09:02:37 -0400
Subject: [PATCH 3/4] Handle refreshed LCFS and ETB aggregate shifts

---
 .../datasets/create_datasets.py               | 12 +++
 .../datasets/imputations/consumption.py       | 79 ++++++++++++-------
 .../datasets/imputations/services/services.py | 48 +++++++++--
 .../tests/microsimulation/reforms_config.yaml | 14 +---
 .../tests/test_lcfs_consumption_ingestion.py  | 62 +++++++++++++++
 .../tests/test_private_releases.py            |  4 +
 6 files changed, 172 insertions(+), 47 deletions(-)
 create mode 100644 policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py

diff --git a/policyengine_uk_data/datasets/create_datasets.py b/policyengine_uk_data/datasets/create_datasets.py
index c76ba7d6a..391896dbd 100644
--- a/policyengine_uk_data/datasets/create_datasets.py
+++ b/policyengine_uk_data/datasets/create_datasets.py
@@ -103,6 +103,7 @@ def main():
             "Clone and assign OA geography",
             "Calibrate constituency weights",
             "Calibrate local authority weights",
+            "Calibrate public service aggregates",
             "Calibrate fuel litres",
             "Save final dataset",
             "Create tiny datasets",
@@ -288,6 +289,17 @@ def main():
                 )
                 update_dataset(materialize_step, "completed")
 
+            update_dataset("Calibrate public service aggregates", "processing")
+            from policyengine_uk_data.datasets.imputations.services.services import (
+                calibrate_rail_subsidy_spending,
+            )
+
+            calibrate_rail_subsidy_spending(
+                frs_calibrated,
+                frs_release.calibration_year,
+            )
+            update_dataset("Calibrate public service aggregates", "completed")
+
             update_dataset("Calibrate fuel litres", "processing")
             from policyengine_uk_data.datasets.imputations.consumption import (
                 calibrate_dataset_fuel_litre_proxies_to_road_fuel,
diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py
index cb04ffb0c..bf8ad42fd 100644
--- a/policyengine_uk_data/datasets/imputations/consumption.py
+++ b/policyengine_uk_data/datasets/imputations/consumption.py
@@ -114,36 +114,36 @@
 }
 
 HOUSEHOLD_LCF_RENAMES = {
-    "G018": "is_adult",
-    "G019": "is_child",
-    "Gorx": "region",
-    "P389p": "hbai_household_net_income",
+    "g018": "is_adult",
+    "g019": "is_child",
+    "gorx": "region",
+    "p389p": "hbai_household_net_income",
     "p344p": "household_gross_income",
     "weighta": "household_weight",
 }
 PERSON_LCF_RENAMES = {
-    "B303p": "employment_income",
-    "B3262p": "self_employment_income",
-    "B3381": "state_pension",
-    "P049p": "private_pension_income",
+    "b303p": "employment_income",
+    "b3262p": "self_employment_income",
+    "b3381": "state_pension",
+    "p049p": "private_pension_income",
 }
 
 CONSUMPTION_VARIABLE_RENAMES = {
-    "P601": "food_and_non_alcoholic_beverages_consumption",
-    "P602": "alcohol_and_tobacco_consumption",
-    "P603": "clothing_and_footwear_consumption",
-    "P604": "housing_water_and_electricity_consumption",
-    "P605": "household_furnishings_consumption",
-    "P606": "health_consumption",
-    "P607": "transport_consumption",
-    "P608": "communication_consumption",
-    "P609": "recreation_consumption",
-    "P610": "education_consumption",
-    "P611": "restaurants_and_hotels_consumption",
-    "P612": "miscellaneous_consumption",
-    "C72211": "petrol_spending",
-    "C72212": "diesel_spending",
-    "P537": "domestic_energy_consumption",  # aggregate kept for backward compat
+    "p601": "food_and_non_alcoholic_beverages_consumption",
+    "p602": "alcohol_and_tobacco_consumption",
+    "p603": "clothing_and_footwear_consumption",
+    "p604": "housing_water_and_electricity_consumption",
+    "p605": "household_furnishings_consumption",
+    "p606": "health_consumption",
+    "p607": "transport_consumption",
+    "p608": "communication_consumption",
+    "p609": "recreation_consumption",
+    "p610": "education_consumption",
+    "p611": "restaurants_and_hotels_consumption",
+    "p612": "miscellaneous_consumption",
+    "c72211": "petrol_spending",
+    "c72212": "diesel_spending",
+    "p537": "domestic_energy_consumption",  # aggregate kept for backward compat
 }
 
 PREDICTOR_VARIABLES = [
@@ -220,6 +220,7 @@ def get_consumption_model_metadata() -> dict:
         "frs_base_year": CURRENT_FRS_RELEASE.base_year,
         "predictor_variables": tuple(PREDICTOR_VARIABLES),
         "impute_variables": tuple(IMPUTATIONS),
+        "domestic_energy_consumption_source": "calibrated_electricity_plus_gas",
     }
 
 
@@ -396,10 +397,10 @@ def _derive_energy_from_lcfs(household: pd.DataFrame) -> pd.DataFrame:
 
     All values are annualised (multiply weekly × 52) downstream with other variables.
     """
-    p537 = household["P537"]
-    b226 = household["B226"]
-    b489 = household["B489"]
-    b490 = household["B490"]
+    p537 = household["p537"]
+    b226 = household["b226"]
+    b489 = household["b489"]
+    b490 = household["b490"]
 
     # Mean electricity share from DD-billed households (B226/P537 median ≈ 0.55)
     dd_mask = (b226 > 0) & (p537 > 0)
@@ -443,6 +444,12 @@ def _derive_energy_from_lcfs(household: pd.DataFrame) -> pd.DataFrame:
     return household
 
 
+def _normalise_lcfs_columns(data: pd.DataFrame) -> pd.DataFrame:
+    data = data.copy()
+    data.columns = [column.lower() for column in data.columns]
+    return data
+
+
 def _calibrate_energy_to_need(
     household: pd.DataFrame, income_col: str = "household_gross_income"
 ) -> pd.DataFrame:
@@ -562,13 +569,16 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame)
     calibrates to NEED 2023 income-band targets, and includes housing predictors
     (tenure_type, accommodation_type) alongside the existing income/demographic ones.
     """
+    lcfs_person = _normalise_lcfs_columns(lcfs_person)
+    lcfs_household = _normalise_lcfs_columns(lcfs_household)
+
     person = lcfs_person.rename(columns=PERSON_LCF_RENAMES)
     household = lcfs_household.rename(columns=HOUSEHOLD_LCF_RENAMES)
     household["region"] = household["region"].map(REGIONS)
 
     # Housing predictors — map LCFS codes to FRS enum strings
-    household["tenure_type"] = lcfs_household["A122"].map(LCFS_TENURE_MAP)
-    household["accommodation_type"] = lcfs_household["A121"].map(LCFS_ACCOMM_MAP)
+    household["tenure_type"] = lcfs_household["a122"].map(LCFS_TENURE_MAP)
+    household["accommodation_type"] = lcfs_household["a121"].map(LCFS_ACCOMM_MAP)
 
     # Derive gas and electricity before renaming/annualising P537
     household = _derive_energy_from_lcfs(household)
@@ -588,13 +598,17 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame)
     for variable in annualise:
         household[variable] = household[variable] * WEEKS_IN_YEAR
     for variable in PERSON_LCF_RENAMES.values():
+        totals_by_case = person.groupby("case")[variable].sum()
         household[variable] = (
-            person[variable].groupby(person.case).sum()[household.case] * WEEKS_IN_YEAR
+            household["case"].map(totals_by_case).fillna(0) * WEEKS_IN_YEAR
         )
     household.household_weight *= 1_000
 
     # Calibrate energy to NEED 2023 targets by income band
     household = _calibrate_energy_to_need(household)
+    household["domestic_energy_consumption"] = (
+        household["electricity_consumption"] + household["gas_consumption"]
+    )
 
     # Impute has_fuel_consumption from WAS vehicle ownership
     household = impute_has_fuel_to_lcfs(household)
@@ -870,6 +884,11 @@ def _wmean(arr, mask):
                     arr[mask] *= target / wm
         dataset.household[col] = arr
 
+    dataset.household["domestic_energy_consumption"] = (
+        dataset.household["electricity_consumption"]
+        + dataset.household["gas_consumption"]
+    )
+
     # Zero out car-fuel spending for non-ICE households
     no_fuel = has_fuel_consumption == 0
     dataset.household["petrol_spending"][no_fuel] = 0
diff --git a/policyengine_uk_data/datasets/imputations/services/services.py b/policyengine_uk_data/datasets/imputations/services/services.py
index 2b7892f2a..43ab26391 100644
--- a/policyengine_uk_data/datasets/imputations/services/services.py
+++ b/policyengine_uk_data/datasets/imputations/services/services.py
@@ -6,16 +6,20 @@
 """
 
 from policyengine_uk.data import UKSingleYearDataset
+from policyengine_uk import Microsimulation
 from policyengine_uk.system import system
+from policyengine_uk_data.datasets.private_releases import CURRENT_ETB_RELEASE
 from .nhs import impute_nhs_usage
 from .etb import impute_public_services, create_efrs_input_dataset
 
-# ETB survey year (most recent year in ETB data)
-ETB_SURVEY_YEAR = 2021
+# ETB survey year used by the current training data.
+ETB_SURVEY_YEAR = CURRENT_ETB_RELEASE.default_training_year
 
-# Fallback fare index for 2021 if parameter not yet available in policyengine-uk
-# This is the cumulative fare index from base year 2020 (+1.0% from 2020)
-FALLBACK_FARE_INDEX_2021 = 1.010
+RAIL_SUBSIDY_TARGETS = {
+    # ORR/GOV.UK rail finance statistics report GBP 21.6bn of government
+    # support to the rail industry in 2024-25.
+    2025: 21.6e9,
+}
 
 
 def get_fare_index_survey_year() -> float:
@@ -28,8 +32,38 @@ def get_fare_index_survey_year() -> float:
     try:
         return system.parameters.gov.dft.rail.fare_index(ETB_SURVEY_YEAR)
     except AttributeError:
-        # Parameter not yet available in policyengine-uk
-        return FALLBACK_FARE_INDEX_2021
+        return 1.0
+
+
+def calibrate_rail_subsidy_spending(
+    dataset: UKSingleYearDataset,
+    time_period: int,
+) -> float | None:
+    target = RAIL_SUBSIDY_TARGETS.get(time_period)
+    if target is None:
+        return None
+
+    original_time_period = dataset.time_period
+    dataset.time_period = str(original_time_period)
+    try:
+        simulation = Microsimulation(dataset=dataset)
+        actual = simulation.calculate(
+            "rail_subsidy_spending",
+            period=time_period,
+            map_to="household",
+        ).sum()
+    finally:
+        dataset.time_period = original_time_period
+    if actual <= 0:
+        raise ValueError(
+            f"Cannot calibrate rail_subsidy_spending: aggregate is {actual}."
+        )
+
+    scale = target / actual
+    dataset.household["rail_usage"] *= scale
+    if "rail_subsidy_spending" in dataset.household:
+        dataset.household["rail_subsidy_spending"] *= scale
+    return scale
 
 
 def impute_services(
diff --git a/policyengine_uk_data/tests/microsimulation/reforms_config.yaml b/policyengine_uk_data/tests/microsimulation/reforms_config.yaml
index bc688d4ef..8102ec905 100644
--- a/policyengine_uk_data/tests/microsimulation/reforms_config.yaml
+++ b/policyengine_uk_data/tests/microsimulation/reforms_config.yaml
@@ -25,16 +25,10 @@ reforms:
   parameters:
     gov.hmrc.national_insurance.class_1.rates.employee.main: 0.1
 - name: Raise VAT standard rate by 2pp
-  # Delta scales as `consumption * 0.5 * 0.02 / 0.38 ≈ 0.0263 * consumption`
-  # (full-rate share 0.5 × 2pp rate change ÷ 0.38 microdata-VAT-coverage
-  # parameter). The enhanced FRS now carries a UK-realistic ~£1.6T total
-  # consumption base (ONS 2025 total consumer expenditure ≈ £1.6T), so a
-  # 2pp standard-rate rise produces ~£43 bn. The prior 25.0 bn expectation
-  # predates the consumption-base growth. A follow-up should re-examine
-  # whether `microdata_vat_coverage` itself should be raised toward 1.0
-  # now that the enhanced FRS consumption aggregate has caught up — see
-  # #364.
-  expected_impact: 43.0
+  # The refreshed ETB 2023-24 training file lowers the imputed full-rate VAT
+  # expenditure-rate base relative to the older ETB input, so a 2pp standard-rate
+  # rise now produces about GBP 31bn in this generated dataset.
+  expected_impact: 31.3
   tolerance: 10.0
   parameters:
     gov.hmrc.vat.standard_rate: 0.22
diff --git a/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py b/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py
new file mode 100644
index 000000000..3641061cf
--- /dev/null
+++ b/policyengine_uk_data/tests/test_lcfs_consumption_ingestion.py
@@ -0,0 +1,62 @@
+import pandas as pd
+
+from policyengine_uk_data.datasets.imputations import consumption
+from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR
+
+
+def test_generate_lcfs_table_accepts_current_lowercase_tab_headers(monkeypatch):
+    def add_has_fuel(household):
+        household = household.copy()
+        household["has_fuel_consumption"] = 1.0
+        return household
+
+    monkeypatch.setattr(consumption, "impute_has_fuel_to_lcfs", add_has_fuel)
+
+    household = pd.DataFrame(
+        {
+            "case": [1],
+            "g018": [2],
+            "g019": [1],
+            "gorx": [7],
+            "p389p": [1_000.0],
+            "p344p": [1_500.0],
+            "weighta": [0.5],
+            "a121": [2],
+            "a122": [5],
+            "b226": [10.0],
+            "b489": [0.0],
+            "b490": [0.0],
+            "p537": [20.0],
+            **{f"p{code}": [1.0] for code in range(601, 613)},
+            "c72211": [5.0],
+            "c72212": [6.0],
+        }
+    )
+    person = pd.DataFrame(
+        {
+            "case": [1, 1],
+            "b303p": [100.0, 200.0],
+            "b3262p": [10.0, 20.0],
+            "b3381": [0.0, 0.0],
+            "p049p": [5.0, 5.0],
+        }
+    )
+
+    result = consumption.generate_lcfs_table(person, household)
+
+    assert len(result) == 1
+    assert result["region"].iloc[0] == "LONDON"
+    assert result["tenure_type"].iloc[0] == "OWNED_WITH_MORTGAGE"
+    assert result["accommodation_type"].iloc[0] == "HOUSE_SEMI_DETACHED"
+    assert result["employment_income"].iloc[0] == 300.0 * WEEKS_IN_YEAR
+    assert result["household_weight"].iloc[0] == 500
+    assert (
+        result["domestic_energy_consumption"].iloc[0]
+        == result["electricity_consumption"].iloc[0] + result["gas_consumption"].iloc[0]
+    )
+    assert (
+        result[consumption.PREDICTOR_VARIABLES + consumption.IMPUTATIONS]
+        .notna()
+        .all()
+        .all()
+    )
diff --git a/policyengine_uk_data/tests/test_private_releases.py b/policyengine_uk_data/tests/test_private_releases.py
index dc84bd01c..a3429b439 100644
--- a/policyengine_uk_data/tests/test_private_releases.py
+++ b/policyengine_uk_data/tests/test_private_releases.py
@@ -63,6 +63,9 @@ def test_etb_model_metadata_tracks_private_release():
     from policyengine_uk_data.datasets.imputations.services.etb import (
         get_public_services_model_metadata,
     )
+    from policyengine_uk_data.datasets.imputations.services.services import (
+        ETB_SURVEY_YEAR,
+    )
     from policyengine_uk_data.datasets.imputations.vat import (
         DEFAULT_ETB_YEAR,
         get_vat_model_metadata,
@@ -72,5 +75,6 @@ def test_etb_model_metadata_tracks_private_release():
     services_metadata = get_public_services_model_metadata()
 
     assert DEFAULT_ETB_YEAR == CURRENT_ETB_RELEASE.default_training_year
+    assert ETB_SURVEY_YEAR == CURRENT_ETB_RELEASE.default_training_year
     assert vat_metadata["etb_release_name"] == CURRENT_ETB_RELEASE.name
     assert services_metadata["etb_release_name"] == CURRENT_ETB_RELEASE.name

From 5c3d0bcf285b2b58d8334743676d7713beb0fcd3 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Sun, 24 May 2026 09:49:12 -0400
Subject: [PATCH 4/4] Use locked uv environment in data CI

---
 .github/workflows/pull_request.yaml | 10 +++++-----
 .github/workflows/push.yaml         | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml
index 158864a2e..1c4eccc17 100644
--- a/.github/workflows/pull_request.yaml
+++ b/.github/workflows/pull_request.yaml
@@ -64,14 +64,14 @@ jobs:
         uses: actions/setup-python@v6
         with:
           python-version: 3.13
-      - name: Install package
-        run: uv pip install -e ".[dev]" --system
+      - name: Sync locked environment
+        run: uv sync --frozen --all-extras
       - name: Download data inputs
-        run: make download
+        run: uv run --frozen make download
         env:
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
       - name: Build datasets
-        run: make data
+        run: uv run --frozen make data
         env:
           TESTING: "1"
       - name: Save calibration log (constituencies)
@@ -87,4 +87,4 @@ jobs:
           path: la_calibration_log.csv
 
       - name: Run tests
-        run: make test
+        run: uv run --frozen make test
diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
index 973f5e91e..45143e274 100644
--- a/.github/workflows/push.yaml
+++ b/.github/workflows/push.yaml
@@ -52,14 +52,14 @@ jobs:
         with:
           workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
           service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
-      - name: Install package
-        run: uv pip install -e ".[dev]" --system
+      - name: Sync locked environment
+        run: uv sync --frozen --all-extras
       - name: Download data inputs
-        run: make download
+        run: uv run --frozen make download
         env:
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
       - name: Build datasets
-        run: make data
+        run: uv run --frozen make data
       - name: Save calibration log (constituencies)
         uses: actions/upload-artifact@v7
         with:
@@ -72,9 +72,9 @@ jobs:
           name: la_calibration_log.csv
           path: la_calibration_log.csv
       - name: Run tests
-        run: make test
+        run: uv run --frozen make test
       - name: Upload data
-        run: make upload
+        run: uv run --frozen make upload
         env:
           HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
       - name: Publish a git tag