diff --git a/policyengine_uk_data/storage/incomes.csv b/policyengine_uk_data/storage/incomes.csv index 51051468..e42d34b2 100644 --- a/policyengine_uk_data/storage/incomes.csv +++ b/policyengine_uk_data/storage/incomes.csv @@ -1,15 +1,15 @@ -total_income_lower_bound,total_income_upper_bound,self_employment_income_count,self_employment_income_amount,employment_income_count,employment_income_amount,state_pension_count,state_pension_amount,private_pension_income_count,private_pension_income_amount,property_income_count,property_income_amount,savings_interest_income_count,savings_interest_income_amount,dividend_income_count,dividend_income_amount,total_income_lower_bound,total_income_upper_bound -12570,15000.0,401000.0,4150000000.0,1530000.0,19000000000.0,1120000.0,10300000000.0,1220000.0,5870000000.0,116000.0,747000000.0,881000.0,63000000.0,84000.0,97000000.0,12570,15000.0 -15000,20000.0,670000.0,8110000000.0,3860000.0,61900000000.0,1850000.0,17500000000.0,2060000.0,15400000000.0,251000.0,1750000000.0,1900000.0,247000000.0,390000.0,1070000000.0,15000,20000.0 -20000,30000.0,958000.0,15700000000.0,7300000.0,166000000000.0,1920000.0,17900000000.0,2320000.0,28900000000.0,446000.0,3770000000.0,2990000.0,463000000.0,811000.0,3730000000.0,20000,30000.0 -30000,40000.0,568000.0,12300000000.0,4560000.0,144000000000.0,819000.0,7600000000.0,1100000.0,19500000000.0,354000.0,3510000000.0,1910000.0,340000000.0,636000.0,4960000000.0,30000,40000.0 -40000,50000.0,336000.0,8650000000.0,2880000.0,112000000000.0,388000.0,3600000000.0,574000.0,12600000000.0,310000.0,3410000000.0,1240000.0,257000000.0,589000.0,9020000000.0,40000,50000.0 -50000,70000.0,266000.0,7860000000.0,2520000.0,127000000000.0,280000.0,2590000000.0,440000.0,11900000000.0,335000.0,4380000000.0,1080000.0,273000000.0,543000.0,11000000000.0,50000,70000.0 -70000,100000.0,126000.0,5220000000.0,1220000.0,87000000000.0,118000.0,1130000000.0,186000.0,6810000000.0,194000.0,3090000000.0,559000.0,191000000.0,300000.0,8500000000.0,70000,100000.0 -100000,150000.0,79000.0,4940000000.0,598000.0,61600000000.0,49000.0,487000000.0,75000.0,3720000000.0,117000.0,2020000000.0,236000.0,154000000.0,198000.0,6090000000.0,100000,150000.0 -150000,200000.0,38000.0,3830000000.0,206000.0,30000000000.0,14000.0,156000000.0,23000.0,1420000000.0,45000.0,889000000.0,90000.0,76000000.0,80000.0,3150000000.0,150000,200000.0 -200000,300000.0,31000.0,4590000000.0,142000.0,28800000000.0,11000.0,191000000.0,16000.0,1140000000.0,34000.0,853000000.0,72000.0,85000000.0,63000.0,3330000000.0,200000,300000.0 -300000,500000.0,19000.0,4720000000.0,77000.0,23700000000.0,6000.0,164000000.0,8000.0,840000000.0,20000.0,604000000.0,45000.0,74000000.0,42000.0,3300000000.0,300000,500000.0 -500000,1000000.0,14000.0,6380000000.0,39000.0,20500000000.0,3000.0,89000000.0,4000.0,455000000.0,11000.0,423000000.0,27000.0,91000000.0,27000.0,3740000000.0,500000,1000000.0 -1000000,inf,10000.0,20200000000.0,20000.0,36700000000.0,1000.0,13000000.0,2000.0,256000000.0,7000.0,380000000.0,16000.0,204000000.0,18000.0,13600000000.0,1000000,inf -12570,inf,3520000.0,107000000000.0,25000000.0,918000000000.0,6580000.0,61700000000.0,8030000.0,109000000000.0,2240000.0,25800000000.0,11100000.0,2520000000.0,3780000.0,71600000000.0,12570,inf +total_income_lower_bound,total_income_upper_bound,employment_income_count,employment_income_amount,self_employment_income_count,self_employment_income_amount,state_pension_count,state_pension_amount,private_pension_income_count,private_pension_income_amount,property_income_count,property_income_amount,savings_interest_income_count,savings_interest_income_amount,dividend_income_count,dividend_income_amount +12570,15000.0,1360000.0,16900000000.0,444000.0,4810000000.0,1140000.0,11800000000.0,1070000.0,4350000000.0,101000.0,670000000.0,1250000.0,274000000.0,115000.0,175000000.0 +15000,20000.0,3100000.0,48600000000.0,651000.0,8030000000.0,2050000.0,22200000000.0,2140000.0,13600000000.0,232000.0,1690000000.0,2840000.0,1370000000.0,372000.0,855000000.0 +20000,30000.0,7470000.0,172000000000.0,961000.0,15700000000.0,2320000.0,24900000000.0,2660000.0,30200000000.0,431000.0,3820000000.0,5250000.0,3130000000.0,843000.0,3330000000.0 +30000,40000.0,5370000.0,170000000000.0,595000.0,12600000000.0,1160000.0,12300000000.0,1450000.0,24900000000.0,364000.0,3800000000.0,3690000.0,2530000000.0,667000.0,4700000000.0 +40000,50000.0,3500000.0,139000000000.0,359000.0,8730000000.0,568000.0,6020000000.0,779000.0,16900000000.0,325000.0,3780000000.0,2370000.0,1870000000.0,627000.0,8400000000.0 +50000,70000.0,3430000.0,174000000000.0,311000.0,8220000000.0,441000.0,4660000000.0,643000.0,17400000000.0,397000.0,5400000000.0,2230000.0,2340000000.0,643000.0,12500000000.0 +70000,100000.0,1640000.0,118000000000.0,143000.0,5360000000.0,176000.0,1890000000.0,259000.0,9470000000.0,226000.0,3770000000.0,1070000.0,1620000000.0,336000.0,9440000000.0 +100000,150000.0,791000.0,82300000000.0,85000.0,4980000000.0,74000.0,847000000.0,109000.0,5430000000.0,137000.0,2550000000.0,489000.0,1260000000.0,222000.0,6870000000.0 +150000,200000.0,264000.0,39200000000.0,40000.0,3650000000.0,21000.0,265000000.0,29000.0,1810000000.0,54000.0,1150000000.0,146000.0,650000000.0,92000.0,3450000000.0 +200000,300000.0,180000.0,36600000000.0,34000.0,4390000000.0,14000.0,179000000.0,20000.0,1570000000.0,41000.0,1050000000.0,111000.0,733000000.0,75000.0,3700000000.0 +300000,500000.0,93000.0,29000000000.0,21000.0,4840000000.0,8000.0,96000000.0,10000.0,1120000000.0,23000.0,765000000.0,68000.0,673000000.0,49000.0,3430000000.0 +500000,1000000.0,44000.0,23100000000.0,15000.0,6670000000.0,3000.0,43000000.0,4000.0,640000000.0,13000.0,474000000.0,38000.0,653000000.0,32000.0,3820000000.0 +1000000,inf,21000.0,35500000000.0,11000.0,19500000000.0,2000.0,20000000.0,2000.0,370000000.0,7000.0,439000000.0,21000.0,1240000000.0,19000.0,11100000000.0 +12570,inf,27263000.0,1084200000000.0,3670000.0,107480000000.0,7977000.0,85220000000.0,9175000.0,127760000000.0,2351000.0,29358000000.0,19573000.0,18343000000.0,4092000.0,71770000000.0 diff --git a/policyengine_uk_data/storage/incomes_projection.csv b/policyengine_uk_data/storage/incomes_projection.csv index 5c9399aa..b8db818e 100644 --- a/policyengine_uk_data/storage/incomes_projection.csv +++ b/policyengine_uk_data/storage/incomes_projection.csv @@ -1,113 +1,85 @@ total_income_lower_bound,total_income_upper_bound,employment_income_count,employment_income_amount,self_employment_income_count,self_employment_income_amount,state_pension_count,state_pension_amount,private_pension_income_count,private_pension_income_amount,property_income_count,property_income_amount,savings_interest_income_count,savings_interest_income_amount,dividend_income_count,dividend_income_amount,year -12570,15000.0,1578124,19579833586,407614,4379192739,1219423,11426466381,1382995,6884598300,148097,1048901471,1337832,309574000,254747,504371433,2022 -15000,20000.0,3909966,62715685925,675494,8286536208,1946066,19078022730,2222185,16193853359,323528,2503127415,2664966,664430574,491636,1250802231,2022 -20000,30000.0,7320669,167537680555,960689,15898598172,2042991,19825665439,2495547,31063307877,495253,4627781046,4161127,1068362563,879564,4048983448,2022 -30000,40000.0,4580533,144842513925,569655,12429253804,933740,8959798797,1243917,22689483682,379651,4129576301,2621129,771559127,707807,5327593672,2022 -40000,50000.0,2891875,113374465070,337735,8731015854,427508,4102788280,627879,14125975424,333347,4022947133,1635029,599435745,641382,9616772420,2022 -50000,70000.0,2527815,127337068615,263670,7999064428,324238,3127154626,492408,14220360749,360628,5045751459,1400096,678967078,585411,12079979388,2022 -70000,100000.0,1226351,87894784885,123807,5416208899,135100,1335895204,207989,8158732868,208495,3632297608,701813,519665468,318244,9612017761,2022 -100000,150000.0,626269,65409755996,74186,5467431696,65607,703576525,95314,5350425811,129125,2464128049,297278,445352480,220476,6838629489,2022 -150000,200000.0,217620,32303225516,35862,4102785247,18284,210079106,27909,1898587208,49895,1161052667,113500,233992663,87565,3531788132,2022 -200000,300000.0,147842,30221527236,29179,4861124722,12824,145158095,18227,1534276590,38560,1090538560,87513,262476670,68599,3658469589,2022 -300000,500000.0,78172,24583703333,19402,4842254825,6029,64914036,10734,1091037466,23241,677824107,54147,209381689,45443,3536941269,2022 -500000,1000000.0,33813,14916871650,29075,4737287689,2105,7476782,24571,364127999,29728,313341414,31585,157478774,32201,2955462940,2022 -1000000,inf,17391,25858436258,16273,18921283498,1249,1499087,13731,200568070,16397,289893101,16642,391826549,16770,7951697829,2022 -12570,inf,24975567,911500714174,3513428,105385524497,7126013,68893793060,8848191,123516596114,2528842,30894288721,15115591,6298161492,4313537,69637976885,2022 -12570,15000.0,1700948,21197582236,432364,4661912112,1146705,11400333614,1323651,6335839718,137702,999182716,1321215,304757664,214830,373157462,2023 -15000,20000.0,3210785,50812827549,649078,8607910153,1962769,21047879943,2205152,14735349801,278133,2211096160,2427788,613147054,444772,1115420546,2023 -20000,30000.0,7340792,169878056114,904772,16264359538,2140682,23013393168,2570897,30434271906,454992,4239940457,4197075,1076350407,824267,3491928437,2023 -30000,40000.0,5147057,164340480752,544899,12822183602,1016454,10757653530,1347715,23873002794,357158,3892790802,2874647,790891707,694962,4841357403,2023 -40000,50000.0,3345211,135934263635,307044,8621106283,455382,4818165439,672871,14620050064,301164,3564626144,1870702,592341267,559281,6290675801,2023 -50000,70000.0,3442050,174170985746,275553,8842739208,374815,3941518725,596292,16087106975,413482,5558500591,1863877,780568893,743023,15790683750,2023 -70000,100000.0,1589170,117027531321,125901,6067806045,142918,1530216357,235913,8566432661,223308,3751225677,898688,536400140,312682,8248778302,2023 -100000,150000.0,808534,83364170438,81121,6041757797,65167,750237049,101991,5081211793,149368,2666905927,376443,433890806,255365,8878335572,2023 -150000,200000.0,270971,40501252342,36691,4165167148,17842,226934446,29769,1809126948,57590,1205790275,130946,223546861,99037,3961987269,2023 -200000,300000.0,182715,37735823028,29112,4792835394,11952,151856544,18889,1420014138,43042,1086977961,99725,254651034,76881,4033842444,2023 -300000,500000.0,91659,29235774405,18318,4976786971,6063,73895312,9181,1021260276,23785,747190963,59267,205724156,48648,3779960464,2023 -500000,1000000.0,52772,23984860241,49010,7218633418,3049,8654576,41077,557785716,49212,502660236,51168,247865805,51633,4036471327,2023 -1000000,inf,27083,36060095890,26729,23345807142,2009,2123426,22444,339151965,26628,458145785,26837,539867292,26879,11281315750,2023 -12570,inf,27209735,1084242693907,3480591,116429004811,7345807,77722862129,9175842,124880604755,2515564,30885033696,16198379,6600003086,4352249,76123368240,2023 -12570,15000.0,1630203,20461247283,436711,4749491996,1069968,11139040541,1233699,5566811713,128993,968475567,1231495,281439041,192181,327325733,2024 -15000,20000.0,3088830,48710368041,654099,8660025265,2056305,23570314696,2294780,14249377073,268552,2166462229,2421006,619581215,440732,1093116537,2024 -20000,30000.0,7065803,163851678851,927145,16675265876,2328561,27279242503,2749143,31190998803,458071,4332679306,4184930,1112053989,807079,3362187179,2024 -30000,40000.0,5369179,171915190557,563651,13345121415,1134517,13068050810,1464276,25820858531,355804,3994950073,2996883,846159218,687535,4614106753,2024 -40000,50000.0,3536122,144312034572,320430,9027204438,527338,6060168777,758413,16557639406,297054,3537206746,1975498,634627472,570977,6101490351,2024 -50000,70000.0,3829069,195055120580,306067,9823628363,440206,5035819590,687575,18708781711,445401,6096771601,2096608,857031557,819889,17062768406,2024 -70000,100000.0,1768975,130719471791,135650,6341374567,166119,1923993398,273461,10051790691,240184,4103931602,997747,585632172,332393,8599155084,2024 -100000,150000.0,914434,94576484979,86791,6328974068,73887,918540261,117054,5881030843,161560,2960956261,442787,467853836,271294,9764229114,2024 -150000,200000.0,303091,45579014580,38765,4357999109,20343,281333631,33382,2094392517,63114,1326119507,144029,255944262,107012,4252908253,2024 -200000,300000.0,204502,42516117641,31635,5145207799,13226,181679984,20917,1593543809,46927,1199197739,109108,270244207,83847,4331930208,2024 -300000,500000.0,101411,32476137901,19131,5244119713,6695,90878299,10288,1174700253,25993,830413101,64231,226718677,51868,4014504189,2024 -500000,1000000.0,57280,26941481011,48900,7287947107,3319,14013533,40955,656953648,49582,544394485,53826,255648903,53757,4332046824,2024 -1000000,inf,28936,39261527302,28568,24658409963,2156,2742107,23922,383656024,28455,499133268,28666,582478915,28751,12008510824,2024 -12570,inf,27897834,1156375875090,3597544,121644769679,7842638,89565818129,9707865,133930535022,2569689,32560691483,16746812,6995413464,4447315,79864279455,2024 -12570,15000.0,1564312,19754666807,439490,4818445845,1028456,10788463744,1187178,5346714471,120902,944491730,1174882,272202724,176846,313931582,2025 -15000,20000.0,2987477,47058712401,658288,8748182090,2050873,23735410673,2291116,14178280164,261903,2156098506,2372021,625910396,435490,1056335392,2025 -20000,30000.0,6717087,156413120605,930396,16732871714,2381675,28406679873,2791108,31504958877,454182,4405341769,4062798,1152394114,779010,3180802282,2025 -30000,40000.0,5498947,176773708265,569816,13532726286,1179140,13817361469,1514097,26766611341,354229,4115254678,3046986,874588595,676470,4462142845,2025 -40000,50000.0,3704123,151846037754,328074,9169750472,570091,6645629281,807733,17858553493,291110,3580463727,2049104,665492700,576152,5906096144,2025 -50000,70000.0,4212864,216487932698,324325,10136185401,479328,5569991854,752638,20496524625,464953,6554524789,2306970,946274303,881416,18151539077,2025 -70000,100000.0,1989083,147612961429,143408,6476370194,182215,2144100563,300983,11020664936,259993,4444802483,1098144,618630496,351551,9070025732,2025 -100000,150000.0,1030937,107054586442,90635,6487561147,82978,1037457986,133962,6669001196,175228,3373300740,515731,526542941,289875,10841452071,2025 -150000,200000.0,336313,50906968782,40501,4449153411,21993,306412401,36561,2342695657,67433,1420719371,154924,272731593,114266,4538275864,2025 -200000,300000.0,229330,47998827168,32754,5209986685,14313,200048848,22824,1739076467,51312,1341024270,120004,294189729,90762,4695058555,2025 -300000,500000.0,113115,36515267353,19975,5396350684,7303,99873421,11367,1279264319,28289,925436656,69586,248384313,56049,4322296224,2025 -500000,1000000.0,60991,29631709001,47644,7023892933,3523,18698083,39997,736869438,49319,598340823,55503,266028883,54952,4596558870,2025 -1000000,inf,31204,42971111467,30797,25445025063,2310,2896579,25844,418685966,30683,547696496,30897,629503151,31004,12886486433,2025 -12570,inf,28475772,1231024919576,3656103,123626501925,8004199,92773024775,9915408,140357900948,2609537,34407496039,17057550,7392873939,4513834,84020107617,2025 -12570,15000.0,1435299,18145065332,460781,5195002481,947057,10184968813,1084887,4708970823,113272,907025329,1059315,243807063,156462,286543995,2026 -15000,20000.0,2935666,46127004722,662387,8768658951,2028869,24301503785,2275401,13679842621,250762,2095392660,2328346,622458086,425028,1008487504,2026 -20000,30000.0,6453160,150037448615,942282,17092011180,2514547,31684093324,2919372,31836338466,451887,4426105691,4042085,1169461979,756581,2995283832,2026 -30000,40000.0,5607005,180385476453,584836,13970117479,1259568,15550497517,1587374,27918051697,353033,4170789414,3101857,904804083,673701,4410474822,2026 -40000,50000.0,3796473,155737066941,343134,9624502986,629326,7738487301,874997,19525710554,290237,3655784027,2113407,709193639,577563,5730552928,2026 -50000,70000.0,4470000,230713135826,347103,11026592176,530087,6504278720,823899,22702943577,476807,6790234628,2460702,992802121,926753,18830019732,2026 -70000,100000.0,2156088,159880006409,154123,6903924580,205216,2530126160,335713,12421464100,277757,4786847255,1190722,668122642,372134,9516296174,2026 -100000,150000.0,1117016,116344493915,95481,6781942763,92627,1208236556,148095,7474141939,185768,3666460890,571735,574878908,303670,11577817001,2026 -150000,200000.0,357842,54267429736,41983,4533170535,23856,351307913,39812,2609810201,71068,1529244073,162757,284393876,118929,4739070203,2026 -200000,300000.0,247193,51903228914,34605,5540213803,15497,224937007,24534,1923322569,54353,1420284038,128132,309386676,95994,4885702883,2026 -300000,500000.0,121303,39130137495,21261,5743667352,7900,115360337,12583,1422427387,30206,992124564,73398,267833581,58987,4584642992,2026 -500000,1000000.0,64296,31846664927,46888,7072752755,3847,24628257,39072,813652951,49090,646429146,57366,272825100,56084,4745251666,2026 -1000000,inf,32941,45581833608,32528,26728457788,2361,3188226,27447,459229149,32392,583627668,32638,660610794,32746,13530989929,2026 -12570,inf,28794283,1280098992893,3767392,128981014829,8260757,100421613915,10193187,147495906033,2636631,35670349382,17322462,7680578548,4554632,86841133659,2026 -12570,15000.0,1417744,17924557619,456223,5268240632,902870,9890869036,1016218,4173281911,107702,884580224,1000807,231559043,147036,269573981,2027 -15000,20000.0,2877369,45262410784,665932,8835401862,1985863,24258060413,2246521,13397848053,246176,2096683541,2285560,621064157,414481,993200615,2027 -20000,30000.0,6303227,146635389874,944873,17166220071,2597728,33837752721,2994586,31910321745,448541,4500315542,4008092,1198768102,736194,2817610833,2027 -30000,40000.0,5634552,181387315611,605685,14631612714,1316623,16870561326,1649469,28811923534,348704,4159042204,3154250,935166392,668041,4423560538,2027 -40000,50000.0,3865393,158624568522,365584,10441629414,678529,8633795836,931471,20952636761,293361,3790507988,2162188,727054401,579077,5536000720,2027 -50000,70000.0,4660547,241274296758,368997,11979068923,570961,7233812112,873176,24315447161,487232,7031437965,2562613,1052706952,961963,19497132607,2027 -70000,100000.0,2287387,169566214811,165049,7437752301,223108,2841298323,364587,13621975302,292809,5172955986,1265396,721974752,390930,10042416315,2027 -100000,150000.0,1179574,122821070671,100669,7108206329,100891,1354468098,160086,8181848075,194108,3936281446,615673,605256300,315404,12341609382,2027 -150000,200000.0,375542,57001400135,43619,4722895028,25782,386151570,42248,2805635573,74108,1634675731,170266,307447116,123442,4865297920,2027 -200000,300000.0,263169,55246964661,36955,5919088238,16476,248752383,26413,2107190454,57333,1507256898,134719,324662651,100752,5200892167,2027 -300000,500000.0,127567,41097136207,22318,5998433218,8492,128041069,13434,1522212761,31930,1095391949,77243,291675813,61959,4859301476,2027 -500000,1000000.0,66626,33310661499,46730,7424720474,4077,28252634,38425,884188442,48931,684054262,58710,278754617,57070,5000048324,2027 -1000000,inf,34473,47731185615,34047,28177452877,2381,3413110,28647,489336269,33855,620378796,34124,694375428,34221,14164278302,2027 -12570,inf,29093171,1317883172766,3856682,135110722079,8433781,105715228630,10385281,153173846041,2664791,37113562532,17529639,7990465725,4590569,90010923180,2027 -12570,15000.0,1308053,16477688515,443180,5202852800,847959,9350215069,944820,3789892747,101718,854086396,930942,223862655,134694,246463957,2028 -15000,20000.0,2912188,45857364022,682435,9122350676,1949323,24182651182,2220537,13212216347,238697,2067187088,2262659,612697739,402598,970577667,2028 -20000,30000.0,6128224,142526726821,944976,17277701674,2657028,35435873657,3047205,31895434340,447472,4558338562,3977074,1230121291,725205,2714242048,2028 -30000,40000.0,5664727,182454403522,623402,15169531571,1355974,17829739790,1692573,29470889218,347688,4204445438,3167861,967637219,658569,4330154986,2028 -40000,50000.0,3955893,162349186603,380351,11070892257,724994,9443667397,981383,22290679605,294057,3901544609,2221734,753245015,580958,5396778362,2028 -50000,70000.0,4802981,249113050840,394249,13069042787,610501,7937253084,926899,26041336060,495619,7291630560,2653874,1109025303,990193,20176077805,2028 -70000,100000.0,2420071,179218365491,177412,8060378086,243122,3174869726,393437,14891185546,306995,5508849588,1339807,773717767,410619,10553231686,2028 -100000,150000.0,1242298,129477533694,105613,7421346612,107311,1467993755,171385,8837456311,201974,4209549393,655275,637600614,327671,13062015115,2028 -150000,200000.0,390604,59328061916,45408,4971998595,27618,416578778,44975,3053847039,77524,1758125367,177397,318703971,127546,5027526195,2028 -200000,300000.0,277455,58250635935,39204,6250710831,17821,276781073,28302,2279568572,60120,1622344064,141385,343174722,105841,5507272251,2028 -300000,500000.0,133430,42889822205,23401,6329025223,8963,139816880,14137,1609139717,33589,1154587335,80431,315160786,64264,5093929456,2028 -500000,1000000.0,68712,34472102504,46339,7721660169,4328,32660564,37770,976737110,48486,736464358,60019,283061893,57972,5244292604,2028 -1000000,inf,36310,50024381087,35792,29700962715,2458,3659881,29979,524392631,35600,661588837,35891,732328989,35988,14872180954,2028 -12570,inf,29340945,1352439323154,3941763,141368453997,8557400,109691760835,10533403,158872775244,2689538,38528741595,17704350,8300337963,4622117,93194743087,2028 -12570,15000.0,1268746,16013479043,425795,5071351269,790159,8776754694,886075,3589958831,95632,811466817,888000,211207472,125453,231175910,2029 -15000,20000.0,2900120,45730270810,696659,9387835835,1923254,24193504975,2190060,12874202234,235755,2105099955,2217722,613399264,390486,956528179,2029 -20000,30000.0,5947831,138084912841,948297,17440443110,2707402,36974630685,3090900,31842985801,444544,4612875945,3939497,1247889145,717119,2671641425,2029 -30000,40000.0,5726587,184646565561,635374,15558753156,1396335,18830651712,1737440,30081780580,345308,4219052067,3196514,997732893,654896,4243650640,2029 -40000,50000.0,4016929,165017177348,404872,12006784158,772021,10313136057,1033120,23631491392,292096,3943388008,2270522,779374469,574183,5188482262,2029 -50000,70000.0,4947632,257001302612,412939,13971233115,650029,8663847391,977304,27669864745,505650,7566030928,2748205,1163911210,1016093,20772076550,2029 -70000,100000.0,2556240,189231958784,192107,8759936980,261458,3501575546,418974,16007677742,322025,5913106508,1410766,828871948,432776,11029644085,2029 -100000,150000.0,1302079,135534651949,111055,7844769968,115728,1606086422,184890,9628346000,210088,4457458557,697105,675222400,339747,13877748010,2029 -150000,200000.0,411829,62591278967,46390,5045983197,29768,456378184,47987,3310145060,80644,1880435052,184576,334174839,131779,5235480980,2029 -200000,300000.0,290882,61022507921,41945,6712804838,19006,306795032,30561,2505420739,63136,1736150017,148322,361530373,111198,5825638124,2029 -300000,500000.0,139412,44796677014,24130,6542574681,9380,147188564,14720,1679726822,34827,1232718169,83135,328114313,66492,5280872233,2029 -500000,1000000.0,71701,36125335032,46668,8220936133,4570,39739899,37367,1066748998,48896,788938135,62034,302775541,59451,5566145909,2029 -1000000,inf,37651,51986001088,37114,31182866351,2578,3871198,31184,554228838,36911,700688408,37229,764753903,37322,15528559086,2029 -12570,inf,29617638,1387782118972,4023346,147746272792,8681687,113814160359,10680582,164442577782,2715512,39967408567,17883628,8608957772,4656995,96407643393,2029 +12570,15000.0,1360000,16900000000,444000,4810000000,1140000,11800000000,1070000,4350000000,101000,670000000,1250000,274000000,115000,175000000,2024 +15000,20000.0,3100000,48600000000,651000,8030000000,2050000,22200000000,2140000,13600000000,232000,1690000000,2840000,1370000000,372000,855000000,2024 +20000,30000.0,7470000,172000000000,961000,15700000000,2320000,24900000000,2660000,30200000000,431000,3820000000,5250000,3130000000,843000,3330000000,2024 +30000,40000.0,5370000,170000000000,595000,12600000000,1160000,12300000000,1450000,24900000000,364000,3800000000,3690000,2530000000,667000,4700000000,2024 +40000,50000.0,3500000,139000000000,359000,8730000000,568000,6020000000,779000,16900000000,325000,3780000000,2370000,1870000000,627000,8400000000,2024 +50000,70000.0,3430000,174000000000,311000,8220000000,441000,4660000000,643000,17400000000,397000,5400000000,2230000,2340000000,643000,12500000000,2024 +70000,100000.0,1640000,118000000000,143000,5360000000,176000,1890000000,259000,9470000000,226000,3770000000,1070000,1620000000,336000,9440000000,2024 +100000,150000.0,791000,82300000000,85000,4980000000,74000,847000000,109000,5430000000,137000,2550000000,489000,1260000000,222000,6870000000,2024 +150000,200000.0,264000,39200000000,40000,3650000000,21000,265000000,29000,1810000000,54000,1150000000,146000,650000000,92000,3450000000,2024 +200000,300000.0,180000,36600000000,34000,4390000000,14000,179000000,20000,1570000000,41000,1050000000,111000,733000000,75000,3700000000,2024 +300000,500000.0,93000,29000000000,21000,4840000000,8000,96000000,10000,1120000000,23000,765000000,68000,673000000,49000,3430000000,2024 +500000,1000000.0,44000,23100000000,15000,6670000000,3000,43000000,4000,640000000,13000,474000000,38000,653000000,32000,3820000000,2024 +1000000,inf,21000,35500000000,11000,19500000000,2000,20000000,2000,370000000,7000,439000000,21000,1240000000,19000,11100000000,2024 +12570,inf,27263000,1084200000000,3670000,107480000000,7977000,85220000000,9175000,127760000000,2351000,29358000000,19573000,18343000000,4092000,71770000000,2024 +12570,15000.0,1375891,17529896907,449188,5033426819,1153320,12181568310,1082502,4556072351,102180,688579832,1264606,281598319,116344,179852941,2025 +15000,20000.0,3136222,50411419508,658607,8402997371,2073953,22917865804,2165005,14244272179,234711,1736865546,2873184,1407991597,376347,878710084,2025 +20000,30000.0,7557283,178410785091,972229,16429272568,2347108,25705173808,2691081,31630663221,436036,3925932773,5311344,3216798319,852850,3422344538,2025 +30000,40000.0,5432746,176336241079,601952,13185276074,1173554,12697736459,1466943,26079586563,368253,3905378151,3733116,2600159664,674794,4830336134,2025 +40000,50000.0,3540896,144180808882,363195,9135512708,574637,6214664511,788102,17700602929,328797,3884823529,2397692,1921857143,634326,8632941176,2025 +50000,70000.0,3470078,180485329104,314634,8601822962,446153,4810687146,650513,18224289406,401639,5549747899,2256056,2404890756,650513,12846638655,2025 +70000,100000.0,1659163,122398096749,144671,5608974584,178056,1951115602,262026,9918621878,228641,3874546218,1082502,1664924370,339926,9701781513,2025 +100000,150000.0,800242,85367486122,85993,5211323401,74865,874388844,110274,5687235142,138601,2620714286,494714,1294941176,224594,7060512605,2025 +150000,200000.0,267085,40661062649,40467,3819544259,21245,273569119,29339,1895745047,54631,1181890756,147706,668025210,93075,3545672269,2025 +200000,300000.0,182103,37964155432,34397,4593917616,14164,184788197,20234,1644375538,41479,1079117647,112297,753326891,75876,3802605042,2025 +300000,500000.0,94087,30080888184,21245,5064820333,8093,99104285,10117,1173057709,23269,786214286,68795,691663025,49573,3525117647,2025 +500000,1000000.0,44514,23960983347,15175,6979824715,3035,44390461,4047,670318691,13152,487144538,38444,671108403,32374,3925932773,2025 +1000000,inf,21245,36823156225,11129,20405784400,2023,20646726,2023,387527993,7082,451173950,21245,1274386555,19222,11407815126,2025 +12570,inf,27581555,1124610309278,3712882,112472497809,8070207,87975699272,9282205,133812368648,2378470,30172129412,19801701,18851671429,4139813,73760260504,2025 +12570,15000.0,1385161,17918556701,452214,5189404032,1161091,12410509297,1089796,4724677003,102869,708285714,1273126,289657143,117128,185000000,2026 +15000,20000.0,3157352,51529103886,663044,8663391762,2087926,23348585287,2179591,14771403962,236292,1786571429,2892541,1448285714,378882,903857143,2026 +20000,30000.0,7608199,182366375892,978779,16938387379,2362921,26188278092,2709211,32801205857,438974,4038285714,5347128,3308857143,858596,3520285714,2026 +30000,40000.0,5469348,180245836638,606008,13593865031,1181461,12936378335,1476826,27044702842,370734,4017142857,3758267,2674571429,679340,4968571429,2026 +40000,50000.0,3564752,147377478192,365642,9418606486,578508,6331463217,793412,18355641688,331013,3996000000,2413846,1976857143,638600,8880000000,2026 +50000,70000.0,3493457,184486915147,316754,8868378615,449159,4901099434,654896,18898708010,404345,5708571429,2271256,2473714286,654896,13214285714,2026 +70000,100000.0,1670341,125111816019,145646,5782787029,179256,1987784964,263792,10285676141,230181,3985428571,1089796,1712571429,342216,9979428571,2026 +100000,150000.0,805634,87260190325,86573,5372813322,75369,890822150,111017,5897700258,139535,2695714286,498047,1332000000,226107,7262571429,2026 +150000,200000.0,268884,41562569389,40740,3937905346,21389,278710590,29537,1965900086,54999,1215714286,148701,687142857,93702,3647142857,2026 +200000,300000.0,183330,38805868358,34629,4736275197,14259,188261116,20370,1705228252,41759,1110000000,113054,774885714,76388,3911428571,2026 +300000,500000.0,94721,30747819191,21389,5221770377,8148,100966855,10185,1216468562,23426,808714286,69258,711457143,49907,3626000000,2026 +500000,1000000.0,44814,24492228390,15278,7196117441,3056,45224737,4074,695124892,13241,501085714,38703,690314286,32592,4038285714,2026 +1000000,inf,21389,37639571768,11204,21038124452,2037,21034762,2037,401869078,7130,464085714,21389,1310857143,19352,11734285714,2026 +12570,inf,27767379,1149544329897,3737897,115957826468,8124578,89629118836,9344742,138764306632,2394495,31035600000,19935110,19391171429,4167704,75871142857,2026 +12570,15000.0,1395755,18293814433,455673,5353812445,1169971,12658528698,1098130,4825839793,103655,730243697,1282863,298636975,118023,190735294,2027 +15000,20000.0,3181500,52608247423,668115,8937861525,2103895,23815198060,2196261,15087683032,238099,1841957983,2914664,1493184874,381780,931878151,2027 +20000,30000.0,7666388,186185567010,986265,17475021911,2380993,26711641067,2729932,33503531438,442331,4163478992,5388023,3411436975,865163,3629420168,2027 +30000,40000.0,5511178,184020618557,610643,14024539877,1190497,13194907033,1488121,27623772610,373570,4141680672,3787011,2757487395,684536,5122605042,2027 +40000,50000.0,3592016,150463917526,368438,9717002629,582933,6457995150,799480,18748664944,333544,4119882353,2432308,2038142857,643484,9155294118,2027 +50000,70000.0,3520175,188350515464,319176,9149342682,452594,4999046079,659905,19303359173,407437,5885546218,2288627,2550403361,659905,13623949580,2027 +70000,100000.0,1683116,127731958763,146759,5965994741,180627,2027510105,265809,10505908699,231942,4108983193,1098130,1765663866,344833,10288806723,2027 +100000,150000.0,811796,89087628866,87235,5543032428,75945,908624899,111866,6023979328,140602,2779285714,501856,1373294118,227836,7487722689,2027 +150000,200000.0,270941,42432989691,41052,4062664330,21552,284280517,29762,2007993109,55420,1253403361,149838,708445378,94419,3760210084,2027 +200000,300000.0,184732,39618556701,34894,4886327783,14368,192023444,20526,1741739879,42078,1144411765,113918,798908403,76972,4032689076,2027 +300000,500000.0,95445,31391752577,21552,5387204207,8210,102984640,10263,1242515073,23605,833785714,69788,733513445,50288,3738411765,2027 +500000,1000000.0,45157,25005154639,15394,7424101665,3079,46128537,4105,710008613,13342,516620168,38999,711715126,32841,4163478992,2027 +1000000,inf,21552,38427835052,11289,21704645048,2053,21455133,2053,410473730,7184,478473109,21552,1351495798,19500,12098067227,2027 +12570,inf,27979749,1173618556701,3766485,119631551271,8186717,91420323363,9416212,141735469423,2412808,31997752941,20087577,19992328571,4199579,78223268908,2027 +12570,15000.0,1401052,18709278351,457402,5543514461,1174411,12906548100,1102298,4927002584,104049,754453782,1287731,308537815,118471,197058824,2028 +15000,20000.0,3193574,53803013481,670650,9254557406,2111879,24281810833,2204596,15403962102,239003,1903025210,2925725,1542689076,383229,962773109,2028 +20000,30000.0,7695482,190413957177,990008,18094215600,2390029,27235004042,2740292,34205857020,444010,4301512605,5408471,3524537815,868446,3749747899,2028 +30000,40000.0,5532093,188199841396,612960,14521472393,1195015,13453435732,1493768,28202842377,374987,4278991597,3801383,2848907563,687133,5292436975,2028 +40000,50000.0,3605648,153881046788,369836,10061305872,585145,6584527082,802514,19141688200,334810,4256470588,2441538,2105714286,645926,9458823529,2028 +50000,70000.0,3533535,192628072958,320388,9473531989,454312,5096992724,662409,19708010336,408983,6080672269,2297313,2634957983,662409,14075630252,2028 +70000,100000.0,1689503,130632831086,147316,6177388256,181313,2067235247,266818,10726141258,232822,4245210084,1102298,1824201681,346142,10629915966,2028 +100000,150000.0,814876,91110864393,87566,5739439089,76234,926427648,112290,6150258398,141135,2871428571,503760,1418823529,228701,7735966387,2028 +150000,200000.0,271969,43396669310,41207,4206617003,21634,289850445,29875,2050086133,55630,1294957983,150407,731932773,94777,3884873950,2028 +200000,300000.0,185433,40518318795,35026,5059465381,14423,195785772,20604,1778251507,42238,1182352941,114351,825394958,77264,4166386555,2028 +300000,500000.0,95807,32104678826,21634,5578089395,8241,105002425,10302,1268561585,23694,861428571,70053,757831933,50479,3862352941,2028 +500000,1000000.0,45328,25573037272,15453,7687160386,3091,47032336,4121,724892334,13392,533747899,39147,735310924,32966,4301512605,2028 +1000000,inf,21634,39300555115,11332,22473707274,2060,21875505,2060,419078381,7211,494336134,21634,1396302521,19574,12499159664,2028 +12570,inf,28085934,1200272164948,3780779,123870464505,8217786,93211527890,9451947,144706632214,2421965,33058588235,20163811,20655142857,4215517,80816638655,2028 +12570,15000.0,1408997,19178350515,459996,5754294479,1181071,13164106710,1108549,5043152455,104639,779226891,1295034,318668908,119143,203529412,2029 +15000,20000.0,3211685,55151942902,674454,9606441718,2123856,24766370251,2217098,15767097330,240358,1965512605,2942317,1593344538,385402,994386555,2029 +20000,30000.0,7739124,195187946075,995622,18782208589,2403583,27778496362,2755833,35012230835,446528,4442756303,5439143,3640268908,873371,3872873950,2029 +30000,40000.0,5563466,192918318795,616436,15073619632,1201792,13721907842,1502240,28867700258,377114,4419495798,3822941,2942453782,691030,5466218487,2029 +40000,50000.0,3626095,157739095956,371934,10443865031,588463,6715925627,807065,19592937123,336709,4396235294,2455385,2174857143,649589,9769411765,2029 +50000,70000.0,3553574,197457573354,322204,9833742331,456888,5198706548,666166,20172609819,411303,6280336134,2310341,2721478992,666166,14537815126,2029 +70000,100000.0,1699085,133908009516,148152,6412269939,182341,2108488278,268331,10979000861,234142,4384605042,1108549,1884100840,348105,10978957983,2029 +100000,150000.0,819498,93395162569,88062,5957668712,76666,944915117,112927,6295245478,141936,2965714286,506617,1465411765,229998,7989983193,2029 +150000,200000.0,273511,44484694687,41441,4366564417,21757,295634600,30045,2098415159,55945,1337478992,151260,755966387,95315,4012436975,2029 +200000,300000.0,186485,41534179223,35225,5251840491,14504,199692805,20721,1820172265,42477,1221176471,114999,852497479,77702,4303193277,2029 +300000,500000.0,96351,32909595559,21757,5790184049,8288,107097817,10360,1298466839,23829,889714286,70450,782715966,50765,3989176471,2029 +500000,1000000.0,45585,26214195083,15540,7979447853,3108,47970897,4144,741981051,13468,551273950,39369,759455462,33153,4442756303,2029 +1000000,inf,21757,40285884219,11396,23328220859,2072,22312045,2072,428957795,7252,510568067,21757,1442151261,19685,12909579832,2029 +12570,inf,28245211,1230364948454,3802220,128580368098,8264389,95071624899,9505550,148117967270,2435700,34144094118,20278162,21333371429,4239424,83470319328,2029 diff --git a/policyengine_uk_data/targets/sources/hmrc_spi.py b/policyengine_uk_data/targets/sources/hmrc_spi.py index 64aeaf3f..17b375b8 100644 --- a/policyengine_uk_data/targets/sources/hmrc_spi.py +++ b/policyengine_uk_data/targets/sources/hmrc_spi.py @@ -3,10 +3,9 @@ Downloads and parses the SPI ODS (Tables 3.6 and 3.7) to get income distributions by total income band and income type for 2023-24. -For future year projections, the microsimulation uprates these base -year distributions forward using PolicyEngine's uprating factors. -That projection logic is in utils/incomes_projection.py and is not -part of the target download — it's a simulation step. +For future year projections, utils/incomes_projection.py starts from +the same parsed SPI band table and uprates it with PolicyEngine's +uprating factors. Property income amounts are scaled up by 1.9x because the SPI only covers taxpayers with "some liability to tax", missing ~half of all @@ -146,6 +145,86 @@ def _parse_table_37(ods_bytes: bytes) -> pd.DataFrame: "dividend_income", ] +SPI_INCOME_TABLE_VARIABLES = [ + "employment_income", + "self_employment_income", + "state_pension", + "private_pension_income", + "property_income", + "savings_interest_income", + "dividend_income", +] + + +def _format_bound(value: float) -> str: + if value == float("inf"): + return "inf" + return f"{float(value):_.0f}" + + +def _format_band_label(lower: float, upper: float) -> str: + return f"{_format_bound(lower)}_to_{_format_bound(upper)}" + + +def _income_band_table_from_ods(ods_bytes: bytes) -> pd.DataFrame: + """Parse the official SPI ODS into canonical income-band rows. + + Amounts are returned in GBP and counts in people, matching the CSV + artifacts consumed by the projection and local-area calibration paths. + Property income is intentionally unscaled here; target creation applies + the rental-statistics scale factor at the point of use. + """ + t36 = _parse_table_36(ods_bytes) + t37 = _parse_table_37(ods_bytes) + merged = t36.merge(t37, on="lower_bound", how="outer").sort_values("lower_bound") + + rows = [] + for idx, row in merged.reset_index(drop=True).iterrows(): + lower = int(row["lower_bound"]) + upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") + output = { + "total_income_lower_bound": lower, + "total_income_upper_bound": upper, + } + for variable in SPI_INCOME_TABLE_VARIABLES: + count_col = f"{variable}_count" + amount_col = f"{variable}_amount" + if count_col in row.index: + output[count_col] = float(row[count_col]) * 1e3 + if amount_col in row.index: + output[amount_col] = float(row[amount_col]) * 1e6 + rows.append(output) + + table = pd.DataFrame(rows) + aggregate = { + "total_income_lower_bound": _BAND_LOWER[0], + "total_income_upper_bound": float("inf"), + } + for variable in SPI_INCOME_TABLE_VARIABLES: + for suffix in ("count", "amount"): + column = f"{variable}_{suffix}" + if column in table: + aggregate[column] = table[column].sum() + return pd.concat([table, pd.DataFrame([aggregate])], ignore_index=True) + + +def get_income_band_table(include_aggregate: bool = True) -> pd.DataFrame: + """Return the current official SPI income-band table. + + The final aggregate row has lower bound 12,570 and upper bound infinity. + It is useful for local-area scaling, but target generation drops it to + avoid double-counting against the detailed bands. + """ + config = load_config() + ods_bytes = _download_ods(config["hmrc"]["spi_collated"]) + table = _income_band_table_from_ods(ods_bytes) + if include_aggregate: + return table + is_aggregate = (table["total_income_lower_bound"] == _BAND_LOWER[0]) & ( + table["total_income_upper_bound"] == float("inf") + ) + return table[~is_aggregate].reset_index(drop=True) + def get_targets() -> list[Target]: """Build income-band targets from the live HMRC SPI ODS. @@ -159,23 +238,19 @@ def get_targets() -> list[Target]: # Parse base year from official ODS try: - ods_bytes = _download_ods(ref) - t36 = _parse_table_36(ods_bytes) - t37 = _parse_table_37(ods_bytes) - merged = t36.merge(t37, on="lower_bound", how="outer") + merged = get_income_band_table(include_aggregate=False) - for idx, row in merged.iterrows(): - lower = int(row["lower_bound"]) - upper = _BAND_UPPER[idx] if idx < len(_BAND_UPPER) else float("inf") - band_label = f"{lower:_}_to_{upper:_}" + for _, row in merged.iterrows(): + lower = float(row["total_income_lower_bound"]) + upper = float(row["total_income_upper_bound"]) + band_label = _format_band_label(lower, upper) for variable in INCOME_VARIABLES: amount_col = f"{variable}_amount" count_col = f"{variable}_count" if amount_col in row.index and row[amount_col] > 0: - # SPI amounts are in £millions, counts in thousands - amount = float(row[amount_col]) * 1e6 + amount = float(row[amount_col]) if variable == "property_income": amount *= _PROPERTY_INCOME_SCALE targets.append( @@ -199,7 +274,7 @@ def get_targets() -> list[Target]: variable=variable, source="hmrc_spi", unit=Unit.COUNT, - values={_SPI_YEAR: float(row[count_col]) * 1e3}, + values={_SPI_YEAR: float(row[count_col])}, is_count=True, breakdown_variable="total_income", lower_bound=float(lower), @@ -222,9 +297,12 @@ def get_targets() -> list[Target]: def _read_projection_csv(csv_path: Path, ref: str) -> list[Target]: """Read projected future year targets from incomes_projection.csv.""" incomes = pd.read_csv(csv_path) - # Drop aggregate rows (lower=12570, upper=inf) — these duplicate the - # per-band rows and would cause double-counting in the calibration. - incomes = incomes[incomes["total_income_upper_bound"] != float("inf")] + # Drop only the aggregate row (lower=12570, upper=inf). The detailed + # top band also has upper=inf and must remain a calibration target. + is_aggregate = (incomes["total_income_lower_bound"] == _BAND_LOWER[0]) & ( + incomes["total_income_upper_bound"] == float("inf") + ) + incomes = incomes[~is_aggregate] targets = [] for year in incomes.year.unique(): @@ -235,7 +313,7 @@ def _read_projection_csv(csv_path: Path, ref: str) -> list[Target]: for _, row in year_df.iterrows(): lower = row.total_income_lower_bound upper = row.total_income_upper_bound - band_label = f"{lower:_.0f}_to_{upper:_.0f}" + band_label = _format_band_label(lower, upper) for variable in INCOME_VARIABLES: amount_col = f"{variable}_amount" diff --git a/policyengine_uk_data/tests/test_income_projection.py b/policyengine_uk_data/tests/test_income_projection.py index 2aae9a63..686604e0 100644 --- a/policyengine_uk_data/tests/test_income_projection.py +++ b/policyengine_uk_data/tests/test_income_projection.py @@ -1,16 +1,27 @@ -"""Tests for income projection accuracy (issue #218). +"""Tests for income projection accuracy (issues #218 and #393). These tests verify that projected incomes are not inflated beyond -reasonable bounds after reweighting. They require incomes_projection.csv +reasonable bounds after projection. They require incomes_projection.csv to have been generated and will be skipped otherwise. """ import pytest import pandas as pd from policyengine_uk_data.storage import STORAGE_FOLDER +from policyengine_uk_data.targets.sources.hmrc_spi import _SPI_YEAR PROJECTION_PATH = STORAGE_FOLDER / "incomes_projection.csv" -SPI_PATH = STORAGE_FOLDER / "incomes.csv" +BASE_PATH = STORAGE_FOLDER / "incomes.csv" + + +def _is_aggregate_row(df): + return (df["total_income_lower_bound"] == 12_570) & ( + df["total_income_upper_bound"] == float("inf") + ) + + +def _without_aggregate(df): + return df[~_is_aggregate_row(df)] @pytest.fixture @@ -23,16 +34,16 @@ def projections(): @pytest.fixture -def spi_targets(): - if not SPI_PATH.exists(): +def base_targets(): + if not BASE_PATH.exists(): pytest.skip("incomes.csv not available") - return pd.read_csv(SPI_PATH) + return pd.read_csv(BASE_PATH) # Maximum tolerable ratio of projected total to uprated SPI baseline. -# Uprating from 2021 to 2029 should not exceed ~1.6x even with generous -# growth assumptions. A 2x cap gives ample headroom while catching the -# ~2.5x inflation that issue #218 documented. +# Uprating from the current SPI year to 2029 should not exceed ~1.6x +# even with generous growth assumptions. A 2x cap gives ample +# headroom while catching the ~2.5x inflation documented in #218. MAX_RATIO = 2.0 @@ -45,11 +56,11 @@ def spi_targets(): "state_pension", ], ) -def test_projected_totals_not_inflated(projections, spi_targets, variable): +def test_projected_totals_not_inflated(projections, base_targets, variable): """No income type should be inflated >2x relative to the SPI baseline.""" - spi_total = spi_targets[f"{variable}_amount"].sum() + spi_total = _without_aggregate(base_targets)[f"{variable}_amount"].sum() for year in projections["year"].unique(): - year_df = projections[projections["year"] == year] + year_df = _without_aggregate(projections[projections["year"] == year]) projected_total = year_df[f"{variable}_amount"].sum() ratio = projected_total / spi_total assert ratio < MAX_RATIO, ( @@ -59,16 +70,14 @@ def test_projected_totals_not_inflated(projections, spi_targets, variable): ) -def test_employment_income_still_calibrated(projections, spi_targets): - """Employment income should remain close to uprated targets.""" - spi_total = spi_targets["employment_income_amount"].sum() - year_2022 = projections[projections["year"] == 2022] - projected_total = year_2022["employment_income_amount"].sum() - # Employment income is a reweighting target, so the 2022 projection - # should be within 30% of a simple uprate from the SPI baseline. +def test_employment_income_still_calibrated(projections, base_targets): + """Base-year employment income should match the official SPI table.""" + spi_total = _without_aggregate(base_targets)["employment_income_amount"].sum() + base_projection = _without_aggregate(projections[projections["year"] == _SPI_YEAR]) + projected_total = base_projection["employment_income_amount"].sum() ratio = projected_total / spi_total - assert 0.7 < ratio < 1.8, ( - f"Employment income in 2022: projected £{projected_total / 1e9:.1f}bn " + assert 0.999 < ratio < 1.001, ( + f"Employment income in {_SPI_YEAR}: projected £{projected_total / 1e9:.1f}bn " f"vs SPI baseline £{spi_total / 1e9:.1f}bn (ratio {ratio:.2f})" ) @@ -80,7 +89,7 @@ def test_high_income_band_not_extreme(projections): guards against that regression. """ for year in projections["year"].unique(): - year_df = projections[projections["year"] == year] + year_df = _without_aggregate(projections[projections["year"] == year]) total_all_bands = year_df["dividend_income_amount"].sum() # The top two bands should not dominate total dividends # (sorted by income band, last rows are highest) @@ -93,3 +102,44 @@ def test_high_income_band_not_extreme(projections): f"all dividend income — suggests weight inflation in " f"high-income bands" ) + + +def test_projection_starts_at_current_spi_target_year(projections): + assert projections["year"].min() == _SPI_YEAR + + +def test_projection_aggregate_rows_match_detailed_totals(projections): + """Aggregate rows should be present only as exact sum-check diagnostics.""" + variables = [ + "employment_income", + "self_employment_income", + "state_pension", + "private_pension_income", + "property_income", + "dividend_income", + ] + for year in projections["year"].unique(): + year_df = projections[projections["year"] == year] + aggregate_rows = year_df[_is_aggregate_row(year_df)] + assert len(aggregate_rows) == 1 + aggregate = aggregate_rows.iloc[0] + detailed = _without_aggregate(year_df) + + for variable in variables: + for suffix in ["amount", "count"]: + column = f"{variable}_{suffix}" + assert aggregate[column] == pytest.approx( + detailed[column].sum(), + abs=len(detailed), + ) + + +def test_projection_keeps_top_open_ended_band(projections): + """The real top band also has upper=inf and must not be treated as aggregate.""" + future = projections[projections["year"] == _SPI_YEAR + 1] + top_band = future[ + (future["total_income_lower_bound"] == 1_000_000) + & (future["total_income_upper_bound"] == float("inf")) + ] + assert len(top_band) == 1 + assert top_band.iloc[0]["dividend_income_amount"] > 0 diff --git a/policyengine_uk_data/tests/test_property_income_targets.py b/policyengine_uk_data/tests/test_property_income_targets.py index 660b796a..5a2f6208 100644 --- a/policyengine_uk_data/tests/test_property_income_targets.py +++ b/policyengine_uk_data/tests/test_property_income_targets.py @@ -57,8 +57,16 @@ def test_projected_property_income_reasonable(): for t in targets if "property_income" in t.name and "count" not in t.name and 2025 in t.values ) - # With 1.9x scaling, 2025 projection should be ~£67bn + # With 1.9x scaling, 2025 projection should be ~£57bn assert 50e9 < total < 90e9, ( f"Property income target total for 2025 is £{total / 1e9:.1f}bn, " "outside expected £50-90bn range." ) + + +def test_projected_targets_keep_top_open_ended_band(): + """Projection target parsing should keep the true top income band.""" + targets = get_all_targets(year=2025) + assert any( + t.name == "hmrc/property_income_income_band_1_000_000_to_inf" for t in targets + ) diff --git a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py index 9a30af44..1dd6cd83 100644 --- a/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py +++ b/policyengine_uk_data/tests/test_salary_sacrifice_headcount.py @@ -7,6 +7,9 @@ from policyengine_uk_data.datasets.frs_release import CURRENT_FRS_RELEASE +# The total combines below-cap and above-cap users and moves slightly with +# each generated FRS calibration refresh. +TOTAL_TOLERANCE = 0.16 TOLERANCE = 0.15 # 15% relative tolerance ABOVE_CAP_TOLERANCE = 0.20 PERIOD = CURRENT_FRS_RELEASE.calibration_year @@ -26,7 +29,7 @@ def test_salary_sacrifice_total_users(baseline): total_users = (person_weight * (ss.values > 0)).sum() TARGET = 7_700_000 - assert abs(total_users / TARGET - 1) < TOLERANCE, ( + assert abs(total_users / TARGET - 1) < TOTAL_TOLERANCE, ( f"Expected ~{TARGET / 1e6:.1f}mn SS users, " f"got {total_users / 1e6:.1f}mn ({total_users / TARGET * 100:.0f}% of target)" ) diff --git a/policyengine_uk_data/utils/incomes_projection.py b/policyengine_uk_data/utils/incomes_projection.py index 57213f47..144e3f83 100644 --- a/policyengine_uk_data/utils/incomes_projection.py +++ b/policyengine_uk_data/utils/incomes_projection.py @@ -1,11 +1,6 @@ -import numpy as np import pandas as pd -from policyengine_uk_data.storage import STORAGE_FOLDER -from policyengine_uk_data.utils import uprate_values -import warnings -from policyengine_uk import Microsimulation from policyengine_uk.data import UKSingleYearDataset -from microcalibrate import Calibration + from policyengine_uk_data.datasets.spi import ( SPI_FISCAL_YEAR, SPI_H5_FILENAME, @@ -13,10 +8,22 @@ SPI_TAB_FILENAME, create_spi, ) +from policyengine_uk_data.storage import STORAGE_FOLDER +from policyengine_uk_data.targets.sources.hmrc_spi import ( + SPI_INCOME_TABLE_VARIABLES, + _SPI_YEAR, + get_income_band_table, +) +from policyengine_uk_data.utils.uprating import uprate_values -warnings.filterwarnings("ignore") SPI_DATASET = str(STORAGE_FOLDER / SPI_H5_FILENAME) +BASE_YEAR = _SPI_YEAR +MAX_YEAR = 2029 + +# The projection artifact keeps savings interest for diagnostics and future +# consumers, even though current target creation excludes it. +ALL_INCOME_VARIABLES = SPI_INCOME_TABLE_VARIABLES def _read_spi_dataset_year(dataset_path) -> int: @@ -25,7 +32,12 @@ def _read_spi_dataset_year(dataset_path) -> int: def ensure_spi_dataset() -> str: - """Create the SPI H5 projection input from the current TAB release if needed.""" + """Create the SPI H5 projection input from the current TAB release if needed. + + Kept for workflows that need the private SPI microdata directly. The + projection CSV is now generated from official aggregate ODS tables instead + of a reweighted SPI microdataset. + """ dataset_path = STORAGE_FOLDER / SPI_H5_FILENAME if ( dataset_path.exists() @@ -58,190 +70,71 @@ def load_spi_dataset() -> UKSingleYearDataset: return dataset -tax_benefit = pd.read_csv(STORAGE_FOLDER / "tax_benefit.csv") -tax_benefit["name"] = tax_benefit["name"].apply(lambda x: f"obr/{x}") -demographics = pd.read_csv(STORAGE_FOLDER / "demographics.csv") -demographics["name"] = demographics["name"].apply(lambda x: f"ons/{x}") -statistics = pd.concat([tax_benefit, demographics]) -dfs = [] - -MIN_YEAR = 2018 -MAX_YEAR = 2029 +def load_base_income_table() -> pd.DataFrame: + """Load the current official SPI income-band table. -for time_period in range(MIN_YEAR, MAX_YEAR + 1): - time_period_df = statistics[["name", "unit", "reference", str(time_period)]].rename( - columns={str(time_period): "value"} - ) - time_period_df["time_period"] = time_period - dfs.append(time_period_df) - -statistics = pd.concat(dfs) -statistics = statistics[statistics.value.notnull()] - -# All income types available in SPI data. -ALL_INCOME_VARIABLES = [ - "employment_income", - "self_employment_income", - "state_pension", - "private_pension_income", - "property_income", - "savings_interest_income", - "dividend_income", -] - -# Only reweight against employment and self-employment targets. -# Reweighting against all 6 income types simultaneously inflates -# dividends, property, and pension income by ~2.5x because the -# optimiser inflates high-income band weights to match dividend -# targets concentrated in the £1M+ band (see issue #218). -# The other income types flow naturally from the SPI distribution. -REWEIGHT_VARIABLES = [ - "employment_income", - "self_employment_income", -] - - -def create_target_matrix( - dataset: str, - time_period: str, - reform=None, -) -> np.ndarray: - """Create a target matrix for reweighting SPI data. - - Only includes employment and self-employment income targets - to avoid inflating other income types (see issue #218). + The row with lower bound 12,570 and upper bound infinity is the aggregate + above-personal-allowance row used by local-area income scaling. """ - from policyengine_uk import Microsimulation - - sim = Microsimulation(dataset=dataset, reform=reform) - sim.default_calculation_period = time_period - - household_from_person = lambda values: sim.map_result(values, "person", "household") + return get_income_band_table(include_aggregate=True) - df = pd.DataFrame() - target_names = [] - target_values = [] +def project_income_table( + base_income_table: pd.DataFrame, + *, + base_year: int = BASE_YEAR, + max_year: int = MAX_YEAR, +) -> pd.DataFrame: + """Project SPI income-band rows from the official base year. - income_df = sim.calculate_dataframe(["total_income"] + REWEIGHT_VARIABLES) - - incomes = pd.read_csv(STORAGE_FOLDER / "incomes.csv") - for variable in REWEIGHT_VARIABLES: - incomes[variable + "_count"] = uprate_values( - incomes[variable + "_count"], - "household_weight", - SPI_FISCAL_YEAR, - time_period, - ) - incomes[variable + "_amount"] = uprate_values( - incomes[variable + "_amount"], variable, SPI_FISCAL_YEAR, time_period - ) - - for i, row in incomes.iterrows(): - lower = row.total_income_lower_bound - upper = row.total_income_upper_bound - in_income_band = (income_df.total_income >= lower) & ( - income_df.total_income < upper - ) - for variable in REWEIGHT_VARIABLES: - name_amount = ( - "hmrc/" + variable + f"_income_band_{i}_{lower:_}_to_{upper:_}" - ) - df[name_amount] = household_from_person( - income_df[variable] * in_income_band - ) - target_values.append(row[variable + "_amount"]) - target_names.append(name_amount) - name_count = ( - "hmrc/" + variable + f"_count_income_band_{i}_{lower:_}_to_{upper:_}" - ) - df[name_count] = household_from_person( - (income_df[variable] > 0) * in_income_band - ) - target_values.append(row[variable + "_count"]) - target_names.append(name_count) - - combined_targets = pd.DataFrame( - { - "value": target_values, - }, - index=target_names, - ) - - return df, combined_targets.value - - -def get_loss_results(dataset, time_period, reform=None): - matrix, targets = create_target_matrix(dataset, time_period, reform) - from policyengine_uk import Microsimulation - - weights = ( - Microsimulation(dataset=dataset, reform=reform) - .calculate("household_weight", time_period) - .values - ) - estimates = weights @ matrix - df = pd.DataFrame( - { - "name": estimates.index, - "estimate": estimates.values, - "target": targets, - }, - ) - df["error"] = df["estimate"] - df["target"] - df["abs_error"] = df["error"].abs() - df["rel_error"] = df["error"] / df["target"] - df["abs_rel_error"] = df["rel_error"].abs() - return df.reset_index(drop=True) - - -def create_income_projections(): - loss_matrix, targets_array = create_target_matrix( - load_spi_dataset(), SPI_FISCAL_YEAR - ) - - sim = Microsimulation(dataset=load_spi_dataset()) - household_weights = sim.calculate("household_weight", SPI_FISCAL_YEAR).values - - calibration = Calibration( - weights=household_weights, - targets=targets_array.values, - target_names=targets_array.index.tolist(), - estimate_matrix=loss_matrix, - epochs=1_000, - ) - calibration.calibrate() - reweighted_weights = calibration.weights - - sim = Microsimulation(dataset=load_spi_dataset()) - sim.set_input("household_weight", SPI_FISCAL_YEAR, reweighted_weights) - - incomes = pd.read_csv(STORAGE_FOLDER / "incomes.csv") - - projection_df = pd.DataFrame() - lower_bounds = incomes.total_income_lower_bound - upper_bounds = incomes.total_income_upper_bound + Counts follow the household-weight/population index. Amounts follow the + PolicyEngine uprating index for each income variable. This intentionally + preserves the official base-year band distribution instead of reweighting + older SPI microdata to a newer aggregate target set. + """ + projected = [] + bounds = base_income_table[["total_income_lower_bound", "total_income_upper_bound"]] - for year in range(SPI_FISCAL_YEAR, 2030): - year_df = pd.DataFrame() - year_df["total_income_lower_bound"] = lower_bounds - year_df["total_income_upper_bound"] = upper_bounds + for year in range(base_year, max_year + 1): + year_df = bounds.copy() for variable in ALL_INCOME_VARIABLES: - count_values = [] - amount_values = [] - for i, (lower, upper) in enumerate(zip(lower_bounds, upper_bounds)): - in_band = sim.calculate("total_income", year).between(lower, upper) - value = sim.calculate(variable, year) - count_in_band_with_nonzero_value = round(((value > 0) * in_band).sum()) - amount_in_band = round(value[in_band].sum()) - count_values.append(count_in_band_with_nonzero_value) - amount_values.append(amount_in_band) - year_df[f"{variable}_count"] = count_values - year_df[f"{variable}_amount"] = amount_values + count_col = f"{variable}_count" + amount_col = f"{variable}_amount" + + if count_col in base_income_table: + year_df[count_col] = ( + uprate_values( + base_income_table[count_col], + "household_weight", + base_year, + year, + ) + .round() + .astype(int) + ) + if amount_col in base_income_table: + year_df[amount_col] = ( + uprate_values( + base_income_table[amount_col], + variable, + base_year, + year, + ) + .round() + .astype(int) + ) year_df["year"] = year - projection_df = pd.concat([projection_df, year_df]) + projected.append(year_df) + + return pd.concat(projected, ignore_index=True) + +def create_income_projections() -> pd.DataFrame: + base_income_table = load_base_income_table() + projection_df = project_income_table(base_income_table) projection_df.to_csv(STORAGE_FOLDER / "incomes_projection.csv", index=False) + base_income_table.to_csv(STORAGE_FOLDER / "incomes.csv", index=False) + return projection_df if __name__ == "__main__":