Skip to content

Commit cffbc16

Browse files
MaxGhenisclaude
andcommitted
Add end-to-end hierarchical calibration tests
Tests the full hierarchical calibration flow: - Person-level targets aggregated to household level - IPF calibration on household weights - Weight propagation from households to persons - Mixed household and person-level targets Key insight: weights are ONLY at household level, so person targets like "count people aged 18-64" must be aggregated to household indicators first: sum(hh_weight * count_matching_in_hh) = total_target 11 tests covering: - Constraint building with person→household aggregation - IPF convergence and accuracy - Weight propagation correctness - Edge cases (infeasible targets, empty strata) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 80144f7 commit cffbc16

1 file changed

Lines changed: 382 additions & 0 deletions

File tree

Lines changed: 382 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,382 @@
1+
"""
2+
End-to-end tests for hierarchical calibration pipeline.
3+
4+
Tests the full flow from:
5+
1. Loading hierarchical microdata (households + persons)
6+
2. Building constraints that aggregate person-level targets to household level
7+
3. Running IPF calibration
8+
4. Verifying calibrated weights match targets
9+
10+
Key insight: Weights are ONLY at household level. Person-level targets like
11+
"count people aged 18-64" must be aggregated to household level first:
12+
sum(hh_weight * count_matching_persons_in_hh) = target_total
13+
"""
14+
15+
import numpy as np
16+
import pandas as pd
17+
import pytest
18+
19+
20+
# Test fixtures
21+
@pytest.fixture
22+
def mock_households():
23+
"""Create 500 mock households with state distribution."""
24+
np.random.seed(42)
25+
n_hh = 500
26+
27+
return pd.DataFrame({
28+
"household_id": range(n_hh),
29+
"state_fips": np.random.choice(
30+
["06", "36", "48", "12", "17"], # CA, NY, TX, FL, IL
31+
n_hh,
32+
p=[0.30, 0.15, 0.20, 0.20, 0.15],
33+
),
34+
"weight": np.random.uniform(100, 500, n_hh),
35+
"tenure": np.random.choice([1, 2], n_hh), # 1=own, 2=rent
36+
"n_persons": np.random.choice([1, 2, 3, 4, 5], n_hh, p=[0.25, 0.30, 0.25, 0.15, 0.05]),
37+
})
38+
39+
40+
@pytest.fixture
41+
def mock_persons(mock_households):
42+
"""Create ~1500 mock persons linked to households."""
43+
np.random.seed(43)
44+
persons = []
45+
person_id = 0
46+
47+
for _, hh in mock_households.iterrows():
48+
n_persons = hh["n_persons"]
49+
for i in range(n_persons):
50+
age = np.random.randint(0, 95)
51+
persons.append({
52+
"person_id": person_id,
53+
"household_id": hh["household_id"],
54+
"age": age,
55+
"is_employed": 1 if 18 <= age < 65 and np.random.random() > 0.3 else 0,
56+
"income": np.random.lognormal(10, 1) if 18 <= age < 65 else 0,
57+
})
58+
person_id += 1
59+
60+
return pd.DataFrame(persons)
61+
62+
63+
class TestHierarchicalConstraintBuilding:
64+
"""Test building constraints that aggregate person-level to household-level."""
65+
66+
def test_person_count_aggregated_to_household(self, mock_households, mock_persons):
67+
"""Person-level count targets should be aggregated to household indicators."""
68+
from microplex.calibration import Calibrator
69+
70+
# Target: count of people aged 18-64
71+
# This should produce an indicator at household level where
72+
# indicator[i] = count of people aged 18-64 in household i
73+
working_age_mask = (mock_persons["age"] >= 18) & (mock_persons["age"] < 65)
74+
working_age_persons = mock_persons[working_age_mask]
75+
76+
# Count per household
77+
counts_per_hh = working_age_persons.groupby("household_id").size()
78+
expected_indicator = mock_households["household_id"].map(counts_per_hh).fillna(0).values
79+
80+
# Verify the aggregation math:
81+
# sum(hh_weight * count_per_hh) should give weighted total
82+
weighted_total = (mock_households["weight"].values * expected_indicator).sum()
83+
84+
# This should equal the unweighted count times some average weight
85+
assert weighted_total > 0
86+
assert len(expected_indicator) == len(mock_households)
87+
88+
def test_household_count_is_direct_indicator(self, mock_households, mock_persons):
89+
"""Household-level targets should be direct 0/1 indicators."""
90+
# Target: count of California households
91+
ca_mask = mock_households["state_fips"] == "06"
92+
indicator = ca_mask.astype(float).values
93+
94+
# All values should be 0 or 1
95+
assert set(np.unique(indicator)).issubset({0.0, 1.0})
96+
97+
# Weighted count
98+
weighted_ca_hh = (mock_households["weight"].values * indicator).sum()
99+
assert weighted_ca_hh > 0
100+
101+
102+
class TestIPFCalibration:
103+
"""Test IPF calibration on hierarchical data."""
104+
105+
def test_ipf_converges_on_feasible_targets(self, mock_households, mock_persons):
106+
"""IPF should converge when targets are feasible."""
107+
from microplex.calibration import Calibrator
108+
109+
# Simple household-level targets
110+
targets = {
111+
"state_fips": {
112+
"06": 50000, # CA
113+
"36": 25000, # NY
114+
"48": 33000, # TX
115+
"12": 33000, # FL
116+
"17": 25000, # IL
117+
}
118+
}
119+
120+
calibrator = Calibrator(method="ipf", max_iter=100, tol=1e-4)
121+
calibrator.fit(mock_households, targets)
122+
123+
assert calibrator.is_fitted_
124+
assert calibrator.weights_ is not None
125+
assert len(calibrator.weights_) == len(mock_households)
126+
127+
def test_calibrated_weights_match_targets(self, mock_households, mock_persons):
128+
"""Calibrated weights should match target totals within tolerance."""
129+
from microplex.calibration import Calibrator
130+
131+
targets = {
132+
"state_fips": {
133+
"06": 50000, # CA
134+
"36": 25000, # NY
135+
"48": 33000, # TX
136+
"12": 33000, # FL
137+
"17": 25000, # IL
138+
}
139+
}
140+
141+
calibrator = Calibrator(method="ipf", max_iter=100, tol=1e-6)
142+
calibrator.fit(mock_households, targets)
143+
144+
# Check weighted counts match targets
145+
mock_households["calibrated_weight"] = calibrator.weights_
146+
for state, target in targets["state_fips"].items():
147+
weighted_count = mock_households[
148+
mock_households["state_fips"] == state
149+
]["calibrated_weight"].sum()
150+
np.testing.assert_allclose(weighted_count, target, rtol=0.01)
151+
152+
def test_weights_are_positive(self, mock_households, mock_persons):
153+
"""All calibrated weights should be positive."""
154+
from microplex.calibration import Calibrator
155+
156+
targets = {"tenure": {1: 100000, 2: 66000}}
157+
158+
calibrator = Calibrator(method="ipf")
159+
calibrator.fit(mock_households, targets)
160+
161+
assert np.all(calibrator.weights_ > 0)
162+
163+
164+
class TestHierarchicalPersonTargets:
165+
"""Test calibrating to person-level targets using household weights."""
166+
167+
def test_person_count_via_aggregation(self, mock_households, mock_persons):
168+
"""Should calibrate to person-level count via household aggregation."""
169+
# Create aggregated indicator: count of working-age adults per household
170+
working_age_mask = (mock_persons["age"] >= 18) & (mock_persons["age"] < 65)
171+
working_age_per_hh = (
172+
mock_persons[working_age_mask]
173+
.groupby("household_id")
174+
.size()
175+
.reindex(mock_households["household_id"], fill_value=0)
176+
)
177+
178+
mock_households = mock_households.copy()
179+
mock_households["n_working_age"] = working_age_per_hh.values
180+
181+
# Now calibrate at household level
182+
from microplex.calibration import Calibrator
183+
184+
target_working_age = 100_000_000 # 100M working-age adults
185+
186+
# Use continuous target for the sum
187+
calibrator = Calibrator(method="ipf")
188+
calibrator.fit(
189+
mock_households,
190+
marginal_targets={},
191+
continuous_targets={"n_working_age": target_working_age}
192+
)
193+
194+
# Verify the weighted sum matches target
195+
mock_households["calibrated_weight"] = calibrator.weights_
196+
weighted_sum = (
197+
mock_households["calibrated_weight"] * mock_households["n_working_age"]
198+
).sum()
199+
200+
np.testing.assert_allclose(weighted_sum, target_working_age, rtol=0.01)
201+
202+
203+
class TestE2EPipeline:
204+
"""End-to-end pipeline tests."""
205+
206+
def test_full_pipeline_with_mixed_targets(self, mock_households, mock_persons):
207+
"""Test full pipeline with both household and person-level targets."""
208+
from microplex.calibration import Calibrator
209+
210+
# Step 1: Aggregate person-level features to household level
211+
hh_df = mock_households.copy()
212+
213+
# Count children (age < 18) per household
214+
child_mask = mock_persons["age"] < 18
215+
children_per_hh = (
216+
mock_persons[child_mask]
217+
.groupby("household_id")
218+
.size()
219+
.reindex(hh_df["household_id"], fill_value=0)
220+
)
221+
hh_df["n_children"] = children_per_hh.values
222+
223+
# Count working-age adults per household
224+
adult_mask = (mock_persons["age"] >= 18) & (mock_persons["age"] < 65)
225+
adults_per_hh = (
226+
mock_persons[adult_mask]
227+
.groupby("household_id")
228+
.size()
229+
.reindex(hh_df["household_id"], fill_value=0)
230+
)
231+
hh_df["n_working_age"] = adults_per_hh.values
232+
233+
# Step 2: Define targets
234+
# Use consistent scale - household counts as base
235+
# We have 500 sample households, target scaled appropriately
236+
total_hh = 500_000 # 500k households (1000x sample)
237+
238+
# Household-level targets by state
239+
targets_household = {
240+
"state_fips": {
241+
"06": total_hh * 0.30, # CA: 150k
242+
"36": total_hh * 0.15, # NY: 75k
243+
"48": total_hh * 0.20, # TX: 100k
244+
"12": total_hh * 0.20, # FL: 100k
245+
"17": total_hh * 0.15, # IL: 75k
246+
}
247+
}
248+
249+
# Person-level targets (aggregated) - should be consistent with HH scale
250+
# Average ~2.5 persons per HH, ~0.5 children, ~1.5 working age
251+
avg_children = hh_df["n_children"].mean()
252+
avg_working_age = hh_df["n_working_age"].mean()
253+
254+
targets_person_aggregated = {
255+
"n_children": total_hh * avg_children,
256+
"n_working_age": total_hh * avg_working_age,
257+
}
258+
259+
# Step 3: Calibrate
260+
calibrator = Calibrator(method="ipf", max_iter=200, tol=1e-6)
261+
calibrator.fit(
262+
hh_df,
263+
marginal_targets=targets_household,
264+
continuous_targets=targets_person_aggregated,
265+
)
266+
267+
assert calibrator.is_fitted_
268+
hh_df["calibrated_weight"] = calibrator.weights_
269+
270+
# Step 4: Verify household-level targets
271+
for state, target in targets_household["state_fips"].items():
272+
weighted_count = hh_df[hh_df["state_fips"] == state]["calibrated_weight"].sum()
273+
np.testing.assert_allclose(weighted_count, target, rtol=0.02)
274+
275+
# Step 5: Verify person-level targets (via aggregation)
276+
weighted_children = (hh_df["calibrated_weight"] * hh_df["n_children"]).sum()
277+
weighted_adults = (hh_df["calibrated_weight"] * hh_df["n_working_age"]).sum()
278+
279+
np.testing.assert_allclose(
280+
weighted_children, targets_person_aggregated["n_children"], rtol=0.02
281+
)
282+
np.testing.assert_allclose(
283+
weighted_adults, targets_person_aggregated["n_working_age"], rtol=0.02
284+
)
285+
286+
def test_weights_propagate_to_persons(self, mock_households, mock_persons):
287+
"""Household weights should propagate to all persons in that household."""
288+
from microplex.calibration import Calibrator
289+
290+
hh_df = mock_households.copy()
291+
292+
# Simple calibration
293+
calibrator = Calibrator(method="ipf")
294+
calibrator.fit(hh_df, {"tenure": {1: 100000, 2: 66000}})
295+
296+
hh_df["calibrated_weight"] = calibrator.weights_
297+
298+
# Propagate weights to persons
299+
persons_df = mock_persons.copy()
300+
persons_df = persons_df.merge(
301+
hh_df[["household_id", "calibrated_weight"]],
302+
on="household_id",
303+
)
304+
305+
# Verify all persons in same household have same weight
306+
for hh_id in hh_df["household_id"].head(10):
307+
hh_weight = hh_df[hh_df["household_id"] == hh_id]["calibrated_weight"].iloc[0]
308+
person_weights = persons_df[persons_df["household_id"] == hh_id]["calibrated_weight"]
309+
assert (person_weights == hh_weight).all()
310+
311+
def test_total_person_weight_respects_household_structure(
312+
self, mock_households, mock_persons
313+
):
314+
"""Total weighted person count should equal sum(hh_weight * n_persons_in_hh)."""
315+
from microplex.calibration import Calibrator
316+
317+
hh_df = mock_households.copy()
318+
319+
# Calibrate
320+
calibrator = Calibrator(method="ipf")
321+
calibrator.fit(hh_df, {"tenure": {1: 100000, 2: 66000}})
322+
hh_df["calibrated_weight"] = calibrator.weights_
323+
324+
# Compute expected total persons
325+
expected_total_persons = (hh_df["calibrated_weight"] * hh_df["n_persons"]).sum()
326+
327+
# Propagate and sum
328+
persons_df = mock_persons.copy()
329+
persons_df = persons_df.merge(
330+
hh_df[["household_id", "calibrated_weight"]],
331+
on="household_id",
332+
)
333+
actual_total_persons = persons_df["calibrated_weight"].sum()
334+
335+
np.testing.assert_allclose(actual_total_persons, expected_total_persons, rtol=1e-10)
336+
337+
338+
class TestEdgeCases:
339+
"""Test edge cases and error handling."""
340+
341+
def test_infeasible_targets_handled(self, mock_households, mock_persons):
342+
"""Should handle infeasible targets gracefully."""
343+
from microplex.calibration import Calibrator
344+
345+
# Targets that sum to more than total households can support
346+
# (e.g., all states need 100% of households)
347+
infeasible_targets = {
348+
"state_fips": {
349+
"06": 1_000_000, # More than total
350+
"36": 1_000_000,
351+
"48": 1_000_000,
352+
"12": 1_000_000,
353+
"17": 1_000_000,
354+
}
355+
}
356+
357+
calibrator = Calibrator(method="ipf", max_iter=50)
358+
359+
# Should not raise, but may not converge perfectly
360+
calibrator.fit(mock_households, infeasible_targets)
361+
362+
# Weights should still be positive
363+
assert np.all(calibrator.weights_ > 0)
364+
365+
def test_empty_stratum_handled(self, mock_households, mock_persons):
366+
"""Should handle strata with no matching records."""
367+
from microplex.calibration import Calibrator
368+
369+
# Add target for state that doesn't exist in data
370+
targets = {
371+
"state_fips": {
372+
"06": 50000,
373+
"36": 25000,
374+
"99": 0, # No such state in data
375+
}
376+
}
377+
378+
calibrator = Calibrator(method="ipf")
379+
380+
# Should handle gracefully or skip empty stratum
381+
with pytest.raises(ValueError):
382+
calibrator.fit(mock_households, targets)

0 commit comments

Comments
 (0)