Simulation interface¶

The Simulation class is the core interface of this package. You can initialise it by passing in a dictionary that matches the SimulationOptions schema, and then use its calculate methods to ask it questions.

Some of the options are straightforward and some are more complex. The straightforward ones are:

country: uk or us.
scope: macro (simulating over large data to represent e.g. a country) or household (simulating over specific households you describe).
time_period: the year to simulate.

The next important features are:

reform: the policy to use in the reform scenario if we are comparing against a different scenario.
baseline: the policy to use in the baseline scenario if we are comparing against a different baseline scenario.
data: either a household (if scope is household) or a large dataset name (if scope is macro).

from policyengine import Simulation

sim = Simulation(
    country="uk",
    scope="macro",
    reform={},
    time_period=2025,
)

sim.calculate_economy_comparison()

/opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2022_23.h5
Downloading enhanced_frs_2022_23.h5 from bucket policyengine-uk-data-private
INFO:root:Using Google Cloud Storage for download.

---------------------------------------------------------------------------
DefaultCredentialsError                   Traceback (most recent call last)
Cell In[1], line 3
from policyengine import Simulation
----> 3 sim = Simulation(
   country="uk",
   scope="macro",
   reform={},
   time_period=2025,
)
sim.calculate_economy_comparison()

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:109, in Simulation.__init__(self, **options)
if not isinstance(self.options.data, dict) and not isinstance(
   self.options.data, Dataset
):
   logging.debug("Loading data")
--> 109     self._set_data(self.options.data)
   logging.info("Data loaded")
self._initialise_simulations()

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:171, in Simulation._set_data(self, file_address)
   version = None
else:
   # All official PolicyEngine datasets are stored in GCS;
   # load accordingly
--> 171     filename, version = self._set_data_from_gs(file_address)
   self.data_version = version
time_period = self._set_data_time_period(file_address)

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:405, in Simulation._set_data_from_gs(self, file_address)
version = self.options.data_version
print(f"Downloading {filename} from bucket {bucket}", file=sys.stderr)
--> 405 filepath, version = download(
   filepath=filename,
   gcs_bucket=bucket,
   version=version,
   return_version=True,
)
return filename, version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data_download.py:17, in download(filepath, gcs_bucket, version, return_version)
def download(
   filepath: str,
   gcs_bucket: str,
   version: Optional[str] = None,
   return_version: bool = False,
) -> Tuple[str, str] | str:
   logging.info("Using Google Cloud Storage for download.")
---> 17     downloaded_version = download_file_from_gcs(
       bucket_name=gcs_bucket,
       file_name=filepath,
       destination_path=filepath,
       version=version,
   )
   if return_version:
       return filepath, downloaded_version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:41, in download_file_from_gcs(bucket_name, file_name, destination_path, version)
def download_file_from_gcs(
   bucket_name: str,
   file_name: str,
   destination_path: str,
   version: Optional[str] = None,
) -> str | None:
   """
   Download a file from Google Cloud Storage to a local path.

   (...)
       version (str): The version of the file that was downloaded, if available.
   """
---> 41     version = _get_client().download(
       bucket_name,
       file_name,
       Path(destination_path),
       version=version,
       return_version=True,
   )
   return version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:14, in _get_client()
if _caching_client is not None:
   return _caching_client
---> 14 _caching_client = CachingGoogleStorageClient()
return _caching_client

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:19, in CachingGoogleStorageClient.__init__(self)
def __init__(self):
---> 19     self.client = SimplifiedGoogleStorageClient()
   self.cache = diskcache.Cache()

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/simplified_google_storage_client.py:19, in SimplifiedGoogleStorageClient.__init__(self)
def __init__(self):
---> 19     self.client = Client()

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/storage/client.py:247, in Client.__init__(self, project, credentials, _http, client_info, client_options, use_auth_w_custom_endpoint, extra_headers, api_key)
           no_project = True
           project = "<none>"
--> 247 super(Client, self).__init__(
   project=project,
   credentials=credentials,
   client_options=client_options,
   _http=_http,
)
# Validate that the universe domain of the credentials matches the
# universe domain of the client.
if self._credentials.universe_domain != self.universe_domain:

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:338, in ClientWithProject.__init__(self, project, credentials, client_options, _http)
def __init__(self, project=None, credentials=None, client_options=None, _http=None):
--> 338     _ClientProjectMixin.__init__(self, project=project, credentials=credentials)
   Client.__init__(
       self, credentials=credentials, client_options=client_options, _http=_http
   )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:286, in _ClientProjectMixin.__init__(self, project, credentials)
   project = getattr(credentials, "project_id", None)
if project is None:
--> 286     project = self._determine_default(project)
if project is None:
   raise EnvironmentError(
       "Project was not passed and could not be "
       "determined from the environment."
   )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:305, in _ClientProjectMixin._determine_default(project)
@staticmethod
def _determine_default(project):
   """Helper:  use default project detection."""
--> 305     return _determine_default_project(project)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/_helpers/__init__.py:152, in _determine_default_project(project)
"""Determine default project ID explicitly or implicitly as fall-back.

See :func:`google.auth.default` for details on how the default project
   (...)
:returns: Default project if it can be determined.
"""
if project is None:
--> 152     _, project = google.auth.default()
return project

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/auth/_default.py:685, in default(scopes, request, quota_project_id, default_scopes)
           _LOGGER.warning(
               "No project ID could be determined. Consider running "
               "`gcloud config set project` or setting the %s "
               "environment variable",
               environment_vars.PROJECT,
           )
       return credentials, effective_project_id
--> 685 raise exceptions.DefaultCredentialsError(_CLOUD_SDK_MISSING_CREDENTIALS)

DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.

Providing `baseline` and `reform` policies¶

The baseline and reform policies are dictionaries that represent the policy to simulate. You don’t have to provide a reform policy (if you don’t, the simulation will just simulate the baseline policy). You also don’t have to provide a baseline policy (if you don’t, the simulation will just compare your reform scenario against current law).

If you do, they should each follow this syntax:

{
    "gov.hmrc.income_tax.rate": { // Parameter address, in the country model's `parameters/` folder
        "2025": 0.2 // Value to set the parameter to in the year 2025
    }
}

You can also use this shorthand to set parameters for all years:

{
    "gov.hmrc.income_tax.rate": 0.2
}

Providing `data`¶

If you set scope to macro, you should provide either:

A Google Cloud .h5 dataset address in this format: "gcs://policyengine-us-data/cps_2023.h5" (gcs://bucket/path.h5).
An instance of policyengine_core.data.Dataset (advanced).

See policyengine.constants for the available datasets.

If you set scope to household, you should provide a dictionary that represents a household. This should look like:

{
    "people": { // Entity group
        "person": { // Entity name
            "age": { // Variable (in the country model's `variables/` folder)
                "2025": 30, // Time period and value
            }
        }
    },
    "households": {
        "household": {
            "members": ["person"], // Group entities need a `members` field
            "region": {
                "2025": "LONDON",
            }
        }
    }
}

See the country model’s repository for more information on what entity types are available.

Module documentation¶

Simulate tax-benefit policy and derive society-level output statistics.

class policyengine.simulation.Simulation(**options: SimulationOptions)¶

Simulate tax-benefit policy and derive society-level output statistics.

baseline_simulation: Simulation¶: The baseline tax-benefit simulation.

check_data_version() → None¶: Check the data versions of the simulation against the current data versions.

check_model_version() → None¶: Check the package versions of the simulation against the current package versions.

data_version: str | None = None¶: The version of the data used in the simulation.

is_comparison: bool¶: Whether the simulation is a comparison between two scenarios.

reform_simulation: Simulation | None = None¶: The reform tax-benefit simulation.

pydantic model policyengine.simulation.SimulationOptions¶

Show JSON schema

{
   "title": "SimulationOptions",
   "type": "object",
   "properties": {
      "country": {
         "description": "The country to simulate.",
         "enum": [
            "uk",
            "us"
         ],
         "title": "Country",
         "type": "string"
      },
      "scope": {
         "description": "The scope of the simulation.",
         "enum": [
            "household",
            "macro"
         ],
         "title": "Scope",
         "type": "string"
      },
      "data": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "additionalProperties": true,
               "type": "object"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The data to simulate.",
         "title": "Data"
      },
      "time_period": {
         "default": 2025,
         "description": "The time period to simulate.",
         "title": "Time Period",
         "type": "integer"
      },
      "reform": {
         "anyOf": [
            {
               "$ref": "#/$defs/ParametricReform"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The reform to simulate.",
         "title": "Reform"
      },
      "baseline": {
         "anyOf": [
            {
               "$ref": "#/$defs/ParametricReform"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The baseline to simulate.",
         "title": "Baseline"
      },
      "region": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The region to simulate within the country.",
         "title": "Region"
      },
      "subsample": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "How many, if a subsample, households to randomly simulate.",
         "title": "Subsample"
      },
      "title": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": "[Analysis title]",
         "description": "The title of the analysis (for charts). If not provided, a default title will be generated.",
         "title": "Title"
      },
      "include_cliffs": {
         "anyOf": [
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "default": false,
         "description": "Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.",
         "title": "Include Cliffs"
      },
      "model_version": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.",
         "title": "Model Version"
      },
      "data_version": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.",
         "title": "Data Version"
      }
   },
   "$defs": {
      "ParameterChangeDict": {
         "additionalProperties": {
            "$ref": "#/$defs/ParameterChangeValue"
         },
         "description": "A dict of changes to a parameter, with custom date string as keys\nand various possible value types.\n\nKeys can be formatted one of two ways:\n1. A single year (e.g., \"YYYY\")\n2. A date range (e.g., \"YYYY-MM-DD.YYYY-MM-DD\")",
         "title": "ParameterChangeDict",
         "type": "object"
      },
      "ParameterChangeValue": {
         "description": "A value for a parameter change, which can be any primitive type or 'Infinity'/'-Infinity'",
         "title": "ParameterChangeValue"
      },
      "ParametricReform": {
         "additionalProperties": {
            "anyOf": [
               {
                  "$ref": "#/$defs/ParameterChangeValue"
               },
               {
                  "$ref": "#/$defs/ParameterChangeDict"
               }
            ]
         },
         "description": "A reform that just changes parameter values.\n\nThis is a dict that equates a parameter name to either a single value or a dict of changes.",
         "title": "ParametricReform",
         "type": "object"
      }
   },
   "required": [
      "country",
      "scope"
   ]
}

Config:

arbitrary_types_allowed: bool = True

Fields:

baseline (policyengine.utils.reforms.ParametricReform | Type[policyengine_core.reforms.reform.Reform] | None)
country (Literal['uk', 'us'])
data (str | dict[Any, Any] | Any | None)
data_version (str | None)
include_cliffs (bool | None)
model_version (str | None)
reform (policyengine.utils.reforms.ParametricReform | Type[policyengine_core.reforms.reform.Reform] | None)
region (str | None)
scope (Literal['household', 'macro'])
subsample (int | None)
time_period (int)
title (str | None)

field baseline: ParametricReform | Type[Reform] | None = None¶: The baseline to simulate.

field country: Literal['uk', 'us'] [Required]¶: The country to simulate.

field data: str | dict[Any, Any] | Any | None = None¶: The data to simulate.

field data_version: str | None = None¶: The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.

field include_cliffs: bool | None = False¶: Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.

field model_version: str | None = None¶: The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.

field reform: ParametricReform | Type[Reform] | None = None¶: The reform to simulate.

field region: str | None = None¶: The region to simulate within the country.

field scope: Literal['household', 'macro'] [Required]¶: The scope of the simulation.

field subsample: int | None = None¶: How many, if a subsample, households to randomly simulate.

field time_period: int = 2025¶: The time period to simulate.

field title: str | None = '[Analysis title]'¶: The title of the analysis (for charts). If not provided, a default title will be generated.

Simulation interface¶

Providing baseline and reform policies¶

Providing data¶

Module documentation¶

Providing `baseline` and `reform` policies¶

Providing `data`¶