National dataset validation - PolicyEngine UK data

from policyengine_uk_data import EnhancedFRS_2022_23, FRS_2022_23, SPI_2020_21
from policyengine_uk_data.utils.loss import get_loss_results
import pandas as pd
from itables import init_notebook_mode
import itables.options as opt

opt.maxBytes = "1MB"

init_notebook_mode(all_interactive=True)


def get_validation():
    df = pd.DataFrame()
    for dataset in [FRS_2022_23, EnhancedFRS_2022_23]:
        for year in range(2025, 2029):
            loss_results = get_loss_results(dataset, year)
            loss_results["time_period"] = year
            loss_results["dataset"] = dataset.label
            df = pd.concat([df, loss_results])
    df = df.reset_index(drop=True)
    return df


df = get_validation()
truth_df = df[df.dataset == df.dataset.unique()[0]].reset_index()
truth_df["estimate"] = truth_df["target"]
truth_df["error"] = truth_df["estimate"] - truth_df["target"]
truth_df["abs_error"] = truth_df["error"].abs()
truth_df["rel_error"] = truth_df["error"] / truth_df["target"]
truth_df["abs_rel_error"] = truth_df["rel_error"].abs()
truth_df["dataset"] = "Official"
df = pd.concat([df, truth_df]).reset_index(drop=True)

Calibration check: the table below shows how both the original and enhanced FRS datasets compare to over 2,000 official statistics (which the EFRS was explicitly calibrated to hit) from the OBR, DWP and HMRC.

Since the EFRS is calibrated to these statistics, high performance is expected and achieved.

Full results¶

df.drop(columns=["index"])

Comparisons¶

merged = pd.merge(
    df[df.dataset == "FRS (2022-23)"],
    df[df.dataset == "Enhanced FRS (2022-23)"],
    on=["time_period", "name"],
    suffixes=("_frs", "_efrs"),
)
merged["rel_error_change_under_efrs"] = (
    merged["abs_rel_error_efrs"] - merged["abs_rel_error_frs"]
)
# Sort columns
merged = merged[
    [
        "name",
        "time_period",
        "target_frs",
        "estimate_frs",
        "estimate_efrs",
        "error_frs",
        "error_efrs",
        "abs_error_frs",
        "abs_error_efrs",
        "rel_error_frs",
        "rel_error_efrs",
        "abs_rel_error_frs",
        "abs_rel_error_efrs",
        "rel_error_change_under_efrs",
    ]
]
merged