{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Constituency dataset validation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "
\n", "
\n", "This is the init_notebook_mode cell from ITables v2.2.1
\n", "(you should not see this message - is your notebook trusted?)\n", "
\n", "
\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from policyengine_uk import Microsimulation\n", "import pandas as pd\n", "import h5py\n", "from itables import init_notebook_mode\n", "import itables.options as opt\n", "from pathlib import Path\n", "from policyengine.utils.huggingface import download\n", "from policyengine_uk_data.storage import STORAGE_FOLDER\n", "\n", "opt.maxBytes = \"1MB\"\n", "init_notebook_mode(all_interactive=True)\n", "\n", "REPO = Path(\".\").resolve().parent\n", "\n", "weights_file_path = STORAGE_FOLDER / \"parliamentary_constituency_weights.h5\"\n", "with h5py.File(weights_file_path, \"r\") as f:\n", " weights = f[str(2025)][...]\n", "constituency_names_file_path = download(\n", " repo=\"policyengine/policyengine-uk-data\",\n", " repo_filename=\"constituencies_2024.csv\",\n", " local_folder=None,\n", " version=None,\n", ")\n", "constituencies_2024 = pd.read_csv(constituency_names_file_path)\n", "\n", "baseline = Microsimulation()\n", "household_weights = baseline.calculate(\"household_weight\", 2025).values\n", "\n", "from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import create_constituency_target_matrix, create_national_target_matrix\n", "\n", "constituency_target_matrix, constituency_actuals = create_constituency_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", "national_target_matrix, national_actuals = create_national_target_matrix(\"enhanced_frs_2022_23\", 2025, None)\n", "\n", "constituency_wide = weights @ constituency_target_matrix\n", "constituency_wide.index = constituencies_2024.code.values\n", "constituency_wide[\"name\"] = constituencies_2024.name.values\n", "\n", "constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", "\n", "constituency_actuals.index = constituencies_2024.code.values\n", "constituency_actuals[\"name\"] = constituencies_2024.name.values\n", "constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=[\"index\", \"name\"], var_name=\"variable\", value_name=\"value\")\n", "\n", "constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=[\"index\", \"variable\"], suffixes=(\"_target\", \"_actual\"))\n", "constituency_target_validation.drop(\"name_actual\", axis=1, inplace=True)\n", "constituency_target_validation.columns = [\"index\", \"name\", \"metric\", \"estimate\", \"target\"]\n", "\n", "constituency_target_validation[\"error\"] = constituency_target_validation[\"estimate\"] - constituency_target_validation[\"target\"]\n", "constituency_target_validation[\"abs_error\"] = constituency_target_validation[\"error\"].abs()\n", "constituency_target_validation[\"rel_abs_error\"] = constituency_target_validation[\"abs_error\"] / constituency_target_validation[\"target\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calibration check\n", "Looking at the sorted validation results by relative absolute error shows how well our calibrated weights perform against the actual target statistics across UK parliamentary constituencies under the new 2024 boundaries. The table reveals the accuracy of our estimates, from the closest matches to the largest discrepancies, where a lower relative error indicates better calibration performance." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "
indexnamemetricestimatetargeterrorabs_errorrel_abs_error
\n", "\n", "
\n", "Loading ITables v2.2.1 from the init_notebook_mode cell...\n", "(need help?)
\n", "\n" ], "text/plain": [ " index name \\\n", "10276 E14001589 Wirral West \n", "5283 E14001146 Bury St Edmunds and Stowmarket \n", "4154 E14001317 Knowsley \n", "5855 E14001068 Ashfield \n", "5197 W07000110 Vale of Glamorgan \n", "... ... ... \n", "6842 E14001405 North West Norfolk \n", "7791 W07000104 Newport East \n", "7141 W07000104 Newport East \n", "6643 E14001206 Dunstable and Leighton Buzzard \n", "7293 E14001206 Dunstable and Leighton Buzzard \n", "\n", " metric estimate target \\\n", "10276 hmrc/employment_income/amount/20000_30000 1.322333e+08 1.322420e+08 \n", "5283 age/60_70 1.451102e+04 1.450993e+04 \n", "4154 age/40_50 1.269529e+04 1.269403e+04 \n", "5855 age/70_80 1.078475e+04 1.078346e+04 \n", "5197 age/50_60 1.489186e+04 1.489368e+04 \n", "... ... ... ... \n", "6842 hmrc/employment_income/count/12570_15000 8.755749e+02 8.431956e+01 \n", "7791 hmrc/employment_income/amount/12570_15000 1.544117e+07 1.432437e+06 \n", "7141 hmrc/employment_income/count/12570_15000 1.118195e+03 1.031530e+02 \n", "6643 hmrc/employment_income/count/12570_15000 7.247283e+02 6.187779e+01 \n", "7293 hmrc/employment_income/amount/12570_15000 1.006603e+07 8.592676e+05 \n", "\n", " error abs_error rel_abs_error \n", "10276 -8.699413e+03 8.699413e+03 0.000066 \n", "5283 1.083593e+00 1.083593e+00 0.000075 \n", "4154 1.256512e+00 1.256512e+00 0.000099 \n", "5855 1.288091e+00 1.288091e+00 0.000119 \n", "5197 -1.817116e+00 1.817116e+00 0.000122 \n", "... ... ... ... \n", "6842 7.912553e+02 7.912553e+02 9.384007 \n", "7791 1.400874e+07 1.400874e+07 9.779649 \n", "7141 1.015042e+03 1.015042e+03 9.840155 \n", "6643 6.628505e+02 6.628505e+02 10.712253 \n", "7293 9.206761e+06 9.206761e+06 10.714661 \n", "\n", "[14300 rows x 8 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "constituency_target_validation.sort_values(\"rel_abs_error\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "national_performance = household_weights @ national_target_matrix\n", "national_target_validation = pd.DataFrame({\"metric\": national_performance.index, \"estimate\": national_performance.values})\n", "national_target_validation[\"target\"] = national_actuals.values\n", "\n", "national_target_validation[\"error\"] = national_target_validation[\"estimate\"] - national_target_validation[\"target\"]\n", "national_target_validation[\"abs_error\"] = national_target_validation[\"error\"].abs()\n", "national_target_validation[\"rel_abs_error\"] = national_target_validation[\"abs_error\"] / national_target_validation[\"target\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The table below shows the relative absolute error for each calibration target at the **national level**, sorted from the closest matches to the largest discrepancies." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "
metricestimatetargeterrorabs_errorrel_abs_error
\n", "\n", "
\n", "Loading ITables v2.2.1 from the init_notebook_mode cell...\n", "(need help?)
\n", "\n" ], "text/plain": [ " metric estimate \\\n", "32 obr/tax_credits 8.193125e+07 \n", "273 hmrc/property_income_income_band_51_200_000.0_... 1.279169e+09 \n", "302 hmrc/property_income_count_income_band_53_500_... 1.308427e+04 \n", "67 ons/west_midlands_age_0_9 7.012820e+05 \n", "110 ons/south_west_age_70_79 6.602741e+05 \n", ".. ... ... \n", "283 hmrc/state_pension_income_band_52_300_000.0_to... 1.339890e+08 \n", "14 obr/pension_credit_count 1.808012e+06 \n", "38 obr/winter_fuel_allowance 5.880389e+08 \n", "18 obr/winter_fuel_allowance_count 2.447219e+06 \n", "25 obr/domestic_rates 0.000000e+00 \n", "\n", " target error abs_error rel_abs_error \n", "32 -1.000000e+08 1.819312e+08 1.819312e+08 -1.819312 \n", "273 1.278761e+09 4.083231e+05 4.083231e+05 0.000319 \n", "302 1.306200e+04 2.226575e+01 2.226575e+01 0.001705 \n", "67 7.000000e+05 1.281977e+03 1.281977e+03 0.001831 \n", "110 6.620000e+05 -1.725882e+03 1.725882e+03 0.002607 \n", ".. ... ... ... ... \n", "283 2.089727e+08 -7.498374e+07 7.498374e+07 0.358821 \n", "14 1.300000e+06 5.080119e+05 5.080119e+05 0.390778 \n", "38 1.900000e+09 -1.311961e+09 1.311961e+09 0.690506 \n", "18 1.100000e+07 -8.552781e+06 8.552781e+06 0.777526 \n", "25 4.000000e+08 -4.000000e+08 4.000000e+08 1.000000 \n", "\n", "[335 rows x 6 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "national_target_validation.sort_values(\"rel_abs_error\")" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }