Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 13 additions & 20 deletions policyengine_uk_data/datasets/local_areas/local_authorities/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
- Council tax bands A-H: VOA Council Tax Stock of Properties (per LA)
- Council tax £ paid (net of CTR): MHCLG taxbase × Band D (England),
Welsh Government Council Tax Income (Wales)

Missing-source policy: local target cells stay NaN when no direct LA
source is available. The local-area calibrator masks those cells out of
the local loss. National targets are supplied by a separate national
target matrix, so this module should not fabricate local targets by
allocating national totals across missing-source LAs.
"""

from policyengine_uk import Microsimulation
Expand Down Expand Up @@ -55,7 +61,6 @@ def create_local_authority_target_matrix(

sim = Microsimulation(dataset=dataset, reform=reform)
sim.default_calculation_period = time_period
original_weights = sim.calculate("household_weight", time_period).values

matrix = pd.DataFrame()
y = pd.DataFrame()
Expand Down Expand Up @@ -154,31 +159,20 @@ def create_local_authority_target_matrix(
has_ons_data = (
ons_merged["net_income_bhc"].notna() & ons_merged["households"].notna()
).values
total_households = ons_merged["households"].sum()
la_household_share = np.where(
ons_merged["households"].notna(),
ons_merged["households"].values / total_households,
1 / len(la_codes),
)

national_bhc = (original_weights * hbai_net_income).sum()
national_ahc = (original_weights * hbai_net_income_ahc).sum()
national_hc = (original_weights * housing_costs).sum()

y["ons/equiv_net_income_bhc"] = np.where(
has_ons_data,
ons_merged["equiv_net_income_bhc_target"].values,
national_bhc * la_household_share,
np.nan,
)
y["ons/equiv_net_income_ahc"] = np.where(
has_ons_data,
ons_merged["equiv_net_income_ahc_target"].values,
national_ahc * la_household_share,
np.nan,
)
y["ons/equiv_housing_costs"] = np.where(
has_ons_data,
ons_merged["equiv_housing_costs_target"].values,
national_hc * la_household_share,
np.nan,
)

# ── Tenure targets ─────────────────────────────────────────────
Expand Down Expand Up @@ -216,9 +210,10 @@ def create_local_authority_target_matrix(
("social_rent", "social_rent_pct"),
]:
targets = tenure_merged[pct_col] / 100 * tenure_merged["households"]
national = (original_weights * matrix[f"tenure/{tenure_key}"].values).sum()
y[f"tenure/{tenure_key}"] = np.where(
has_tenure, targets.values, national * la_household_share
has_tenure,
targets.values,
np.nan,
)

# ── Private rent amounts ───────────────────────────────────────
Expand Down Expand Up @@ -247,12 +242,10 @@ def create_local_authority_target_matrix(
& tenure_merged["private_rent_pct"].notna()
& tenure_merged["households"].notna()
).values
national_rent = (original_weights * private_rent_amount).sum()

y["rent/private_rent"] = np.where(
has_rent,
tenure_merged["private_rent_target"].values,
national_rent * la_household_share,
np.nan,
)

# ── Council tax band counts (LA targets) ───────────────────────
Expand Down
6 changes: 3 additions & 3 deletions policyengine_uk_data/tests/test_la_loss_council_tax.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def test_band_count_columns_exist_for_every_wired_band():

def test_england_and_wales_have_band_a_to_h_populated():
"""E/W rows should have non-null counts for A-H. If the CSV regresses
to NaN there, the loss matrix will silently fall back to the
national-share estimate and the calibrator loses its real signal."""
to NaN there, the loss matrix will mask the cell and the calibrator
loses its real signal."""
ew = CT_DATA[CT_DATA["country"].isin(["ENGLAND", "WALES"])]
for band in WIRED_BANDS:
non_null = ew[f"count_band_{band}"].notna().sum()
Expand All @@ -62,7 +62,7 @@ def test_england_and_wales_have_band_a_to_h_populated():

def test_scotland_band_counts_are_null_as_documented():
"""Scotland VOA band counts are absent — they should consistently be
NaN so the loss matrix routes them through the fallback."""
NaN so the loss matrix masks them."""
scotland = CT_DATA[CT_DATA["country"] == "SCOTLAND"]
for band in WIRED_BANDS:
assert scotland[f"count_band_{band}"].isna().all(), (
Expand Down
182 changes: 182 additions & 0 deletions policyengine_uk_data/tests/test_la_loss_missing_sources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import numpy as np
import pandas as pd


class _FakeDataset:
time_period = 2025


class _FakeSim:
def __init__(self, *args, **kwargs):
self.default_calculation_period = 2025

def calculate(self, variable, *args, **kwargs):
values = {
"employment_income": np.array([10_000.0, 30_000.0]),
"income_tax": np.array([1.0, 1.0]),
"age": np.array([40, 70]),
"universal_credit": np.array([0.0, 1.0]),
"equiv_hbai_household_net_income": np.array([20_000.0, 25_000.0]),
"equiv_hbai_household_net_income_ahc": np.array([18_000.0, 22_000.0]),
"tenure_type": np.array(["RENT_PRIVATELY", "OWNED_OUTRIGHT"]),
"benunit_rent": np.array([12_000.0, 0.0]),
"country": np.array(["ENGLAND", "SCOTLAND"]),
}
return type("Result", (), {"values": values[variable]})()

def map_result(self, values, source_entity, target_entity):
return np.asarray(values)


def _fake_la_codes():
return pd.DataFrame(
{
"code": ["E06000001", "W06000001", "S12000001", "N09000001"],
}
)


def _patch_common_la_inputs(monkeypatch, tmp_path):
from policyengine_uk_data.datasets.local_areas.local_authorities import loss

(_storage := tmp_path / "storage").mkdir()
_fake_la_codes().to_csv(_storage / "local_authorities_2021.csv", index=False)

monkeypatch.setattr(loss, "STORAGE_FOLDER", _storage)
monkeypatch.setattr(loss, "Microsimulation", _FakeSim)
monkeypatch.setattr(loss, "INCOME_VARIABLES", ["employment_income"])
monkeypatch.setattr(
loss,
"get_la_income_targets",
lambda: pd.DataFrame(
{
"employment_income_amount": [1.0, 1.0, 1.0, 1.0],
"employment_income_count": [1.0, 1.0, 1.0, 1.0],
}
),
)
monkeypatch.setattr(
loss,
"get_national_income_projections",
lambda year: pd.DataFrame(
{
"total_income_lower_bound": [12_570],
"total_income_upper_bound": [np.inf],
"employment_income_amount": [4.0],
}
),
)
monkeypatch.setattr(
loss,
"get_la_age_targets",
lambda: pd.DataFrame({"age/0_100": [1.0, 1.0, 1.0, 1.0]}),
)
monkeypatch.setattr(loss, "get_uk_total_population", lambda year: 4.0)
monkeypatch.setattr(loss, "get_la_uc_targets", lambda: pd.Series([0, 1, 0, 0]))
monkeypatch.setattr(
loss,
"get_ons_income_uprating_factors",
lambda year: (1.0, 1.0),
)
monkeypatch.setattr(
loss,
"load_household_counts",
lambda: pd.DataFrame(
{
"la_code": ["E06000001", "W06000001"],
"households": [100.0, 200.0],
}
),
)
return loss


def test_la_loss_masks_missing_ons_income_cells(monkeypatch, tmp_path):
loss = _patch_common_la_inputs(monkeypatch, tmp_path)
monkeypatch.setattr(
loss,
"load_ons_la_income",
lambda: pd.DataFrame(
{
"la_code": ["E06000001", "W06000001"],
"net_income_bhc": [30_000.0, 25_000.0],
"net_income_ahc": [26_000.0, 21_000.0],
}
),
)
monkeypatch.setattr(
loss,
"load_tenure_data",
lambda: pd.DataFrame(
{
"la_code": ["E06000001"],
"owned_outright_pct": [30.0],
"owned_mortgage_pct": [30.0],
"private_rent_pct": [25.0],
"social_rent_pct": [15.0],
}
),
)
monkeypatch.setattr(
loss,
"load_private_rents",
lambda: pd.DataFrame(
{"area_code": ["E06000001"], "median_annual_rent": [12_000.0]}
),
)

_, y, _ = loss.create_local_authority_target_matrix(_FakeDataset())

direct = y["ons/equiv_net_income_bhc"].iloc[:2]
missing = y["ons/equiv_net_income_bhc"].iloc[2:]
assert direct.notna().all()
assert missing.isna().all()


def test_la_loss_masks_missing_tenure_and_rent_cells(monkeypatch, tmp_path):
loss = _patch_common_la_inputs(monkeypatch, tmp_path)
monkeypatch.setattr(
loss,
"load_ons_la_income",
lambda: pd.DataFrame(
{
"la_code": ["E06000001", "W06000001"],
"net_income_bhc": [30_000.0, 25_000.0],
"net_income_ahc": [26_000.0, 21_000.0],
}
),
)
monkeypatch.setattr(
loss,
"load_tenure_data",
lambda: pd.DataFrame(
{
"la_code": ["E06000001"],
"owned_outright_pct": [30.0],
"owned_mortgage_pct": [30.0],
"private_rent_pct": [25.0],
"social_rent_pct": [15.0],
}
),
)
monkeypatch.setattr(
loss,
"load_private_rents",
lambda: pd.DataFrame(
{"area_code": ["E06000001"], "median_annual_rent": [12_000.0]}
),
)

_, y, _ = loss.create_local_authority_target_matrix(_FakeDataset())

for column in [
"tenure/owned_outright",
"tenure/owned_mortgage",
"tenure/private_rent",
"tenure/social_rent",
"rent/private_rent",
]:
assert pd.notna(y[column].iloc[0]), f"{column}: direct cell should be finite"
assert y[column].iloc[1:].isna().all(), (
f"{column}: missing-source cells should be masked"
)