Writing datasets#
A common use case of PolicyEngine Core country models is not just simulating for a few households, but thousands in the form of microsimulation on survey data. This technique can be used to simulate the impact of a policy on a population, or to compare the impact of different policies on the same population. To do this, we need to be able to load data into PolicyEngine Core, and to do this we use a standardised format using the Dataset
class.
Example#
Here’s the Country Template’s default example for a dataset.
from policyengine_core.country_template.constants import COUNTRY_DIR
from policyengine_core.data import Dataset
from policyengine_core.periods import ETERNITY, MONTH, period
class CountryTemplateDataset(Dataset):
# Specify metadata used to describe and store the dataset.
name = "country_template_dataset"
label = "Country template dataset"
file_path = (
COUNTRY_DIR / "data" / "storage" / "country_template_dataset.h5"
)
data_format = Dataset.TIME_PERIOD_ARRAYS
# The generation function is the most important part: it defines
# how the dataset is generated from the raw data.
def generate(self) -> None:
person_id = [0, 1, 2]
household_id = [0, 1]
person_household_id = [0, 0, 1]
person_household_role = ["parent", "child", "parent"]
salary = [100, 0, 200]
salary_time_period = period("2022-01")
weight = [1e6, 1.2e6]
weight_time_period = period("2022")
data = {
"person_id": {ETERNITY: person_id},
"household_id": {ETERNITY: household_id},
"person_household_id": {ETERNITY: person_household_id},
"person_household_role": {ETERNITY: person_household_role},
"salary": {salary_time_period: salary},
"household_weight": {weight_time_period: weight},
}
self.save_dataset(data)
from policyengine_core.country_template import Simulation
CountryTemplateDataset().generate()
simulation = Simulation(dataset=CountryTemplateDataset)
simulation.calculate("salary", "2022-01")
array([100., 0., 200.], dtype=float32)