Skip to article frontmatterSkip to article content

Constituency dataset validation

Constituency dataset validation

from policyengine_uk import Microsimulation
import pandas as pd
import h5py
from itables import init_notebook_mode
import itables.options as opt
from pathlib import Path
from policyengine_uk_data.storage import STORAGE_FOLDER

opt.maxBytes = "1MB"
init_notebook_mode(all_interactive=True)

REPO = Path(".").resolve().parent

weights_file_path = STORAGE_FOLDER / "parliamentary_constituency_weights.h5"
with h5py.File(weights_file_path, "r") as f:
        weights = f[str(2025)][...]

constituencies = pd.read_csv(STORAGE_FOLDER / "constituencies_2024.csv")

baseline = Microsimulation()
household_weights = baseline.calculate("household_weight", 2025).values

from policyengine_uk_data.datasets.frs.local_areas.constituencies.loss import create_constituency_target_matrix, create_national_target_matrix
from policyengine_uk_data.datasets import EnhancedFRS_2022_23
constituency_target_matrix, constituency_actuals, _ = create_constituency_target_matrix(EnhancedFRS_2022_23, 2025, None)
national_target_matrix, national_actuals = create_national_target_matrix(EnhancedFRS_2022_23, 2025, None)

constituency_wide = weights @ constituency_target_matrix
constituency_wide.index = constituencies.code.values
constituency_wide["name"] = constituencies.name.values

constituency_results = pd.melt(constituency_wide.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

constituency_actuals.index = constituencies.code.values
constituency_actuals["name"] = constituencies.name.values
constituency_actuals_long = pd.melt(constituency_actuals.reset_index(), id_vars=["index", "name"], var_name="variable", value_name="value")

constituency_target_validation = pd.merge(constituency_results, constituency_actuals_long, on=["index", "variable"], suffixes=("_target", "_actual"))
constituency_target_validation.drop("name_actual", axis=1, inplace=True)
constituency_target_validation.columns = ["index", "name", "metric", "estimate", "target"]

constituency_target_validation["error"] = constituency_target_validation["estimate"] - constituency_target_validation["target"]
constituency_target_validation["abs_error"] = constituency_target_validation["error"].abs()
constituency_target_validation["rel_abs_error"] = constituency_target_validation["abs_error"] / constituency_target_validation["target"]
Loading...

Calibration check

Looking at the sorted validation results by relative absolute error shows how well our calibrated weights perform against the actual target statistics across UK parliamentary constituencies under the new 2024 boundaries. The table reveals the accuracy of our estimates, from the closest matches to the largest discrepancies, where a lower relative error indicates better calibration performance.

constituency_target_validation.sort_values("rel_abs_error", ascending=False)
Loading...
national_performance = household_weights @ national_target_matrix
national_target_validation = pd.DataFrame({"metric": national_performance.index, "estimate": national_performance.values})
national_target_validation["target"] = national_actuals.values

national_target_validation["error"] = national_target_validation["estimate"] - national_target_validation["target"]
national_target_validation["abs_error"] = national_target_validation["error"].abs()
national_target_validation["rel_abs_error"] = national_target_validation["abs_error"] / national_target_validation["target"]

The table below shows the relative absolute error for each calibration target at the national level, sorted from the closest matches to the largest discrepancies.

national_target_validation.sort_values("rel_abs_error")
Loading...