Simulation interface¶
The Simulation
class is the core interface of this package. You can initialise it by passing in a dictionary that matches the SimulationOptions
schema, and then use its calculate
methods to ask it questions.
Some of the options are straightforward and some are more complex. The straightforward ones are:
country
:uk
orus
.scope
:macro
(simulating over large data to represent e.g. a country) orhousehold
(simulating over specific households you describe).time_period
: the year to simulate.
The next important features are:
reform
: the policy to use in the reform scenario if we are comparing against a different scenario.baseline
: the policy to use in the baseline scenario if we are comparing against a different baseline scenario.data
: either a household (ifscope
ishousehold
) or a large dataset name (ifscope
ismacro
).
from policyengine import Simulation
sim = Simulation(
country="uk",
scope="macro",
reform={},
time_period=2025,
)
sim.calculate_economy_comparison()
/opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2022_23.h5
Downloading enhanced_frs_2022_23.h5 from bucket policyengine-uk-data-private
INFO:root:Using Google Cloud Storage for download.
---------------------------------------------------------------------------
DefaultCredentialsError Traceback (most recent call last)
Cell In[1], line 3
1 from policyengine import Simulation
----> 3 sim = Simulation(
4 country="uk",
5 scope="macro",
6 reform={},
7 time_period=2025,
8 )
10 sim.calculate_economy_comparison()
File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:109, in Simulation.__init__(self, **options)
105 if not isinstance(self.options.data, dict) and not isinstance(
106 self.options.data, Dataset
107 ):
108 logging.debug("Loading data")
--> 109 self._set_data(self.options.data)
110 logging.info("Data loaded")
111 self._initialise_simulations()
File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:171, in Simulation._set_data(self, file_address)
166 version = None
168 else:
169 # All official PolicyEngine datasets are stored in GCS;
170 # load accordingly
--> 171 filename, version = self._set_data_from_gs(file_address)
172 self.data_version = version
174 time_period = self._set_data_time_period(file_address)
File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:405, in Simulation._set_data_from_gs(self, file_address)
401 version = self.options.data_version
403 print(f"Downloading {filename} from bucket {bucket}", file=sys.stderr)
--> 405 filepath, version = download(
406 filepath=filename,
407 gcs_bucket=bucket,
408 version=version,
409 return_version=True,
410 )
412 return filename, version
File ~/work/policyengine.py/policyengine.py/policyengine/utils/data_download.py:17, in download(filepath, gcs_bucket, version, return_version)
10 def download(
11 filepath: str,
12 gcs_bucket: str,
13 version: Optional[str] = None,
14 return_version: bool = False,
15 ) -> Tuple[str, str] | str:
16 logging.info("Using Google Cloud Storage for download.")
---> 17 downloaded_version = download_file_from_gcs(
18 bucket_name=gcs_bucket,
19 file_name=filepath,
20 destination_path=filepath,
21 version=version,
22 )
23 if return_version:
24 return filepath, downloaded_version
File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:41, in download_file_from_gcs(bucket_name, file_name, destination_path, version)
23 def download_file_from_gcs(
24 bucket_name: str,
25 file_name: str,
26 destination_path: str,
27 version: Optional[str] = None,
28 ) -> str | None:
29 """
30 Download a file from Google Cloud Storage to a local path.
31
(...)
38 version (str): The version of the file that was downloaded, if available.
39 """
---> 41 version = _get_client().download(
42 bucket_name,
43 file_name,
44 Path(destination_path),
45 version=version,
46 return_version=True,
47 )
48 return version
File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:14, in _get_client()
12 if _caching_client is not None:
13 return _caching_client
---> 14 _caching_client = CachingGoogleStorageClient()
15 return _caching_client
File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:19, in CachingGoogleStorageClient.__init__(self)
18 def __init__(self):
---> 19 self.client = SimplifiedGoogleStorageClient()
20 self.cache = diskcache.Cache()
File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/simplified_google_storage_client.py:19, in SimplifiedGoogleStorageClient.__init__(self)
18 def __init__(self):
---> 19 self.client = Client()
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/storage/client.py:247, in Client.__init__(self, project, credentials, _http, client_info, client_options, use_auth_w_custom_endpoint, extra_headers, api_key)
244 no_project = True
245 project = "<none>"
--> 247 super(Client, self).__init__(
248 project=project,
249 credentials=credentials,
250 client_options=client_options,
251 _http=_http,
252 )
254 # Validate that the universe domain of the credentials matches the
255 # universe domain of the client.
256 if self._credentials.universe_domain != self.universe_domain:
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:338, in ClientWithProject.__init__(self, project, credentials, client_options, _http)
337 def __init__(self, project=None, credentials=None, client_options=None, _http=None):
--> 338 _ClientProjectMixin.__init__(self, project=project, credentials=credentials)
339 Client.__init__(
340 self, credentials=credentials, client_options=client_options, _http=_http
341 )
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:286, in _ClientProjectMixin.__init__(self, project, credentials)
283 project = getattr(credentials, "project_id", None)
285 if project is None:
--> 286 project = self._determine_default(project)
288 if project is None:
289 raise EnvironmentError(
290 "Project was not passed and could not be "
291 "determined from the environment."
292 )
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:305, in _ClientProjectMixin._determine_default(project)
302 @staticmethod
303 def _determine_default(project):
304 """Helper: use default project detection."""
--> 305 return _determine_default_project(project)
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/_helpers/__init__.py:152, in _determine_default_project(project)
140 """Determine default project ID explicitly or implicitly as fall-back.
141
142 See :func:`google.auth.default` for details on how the default project
(...)
149 :returns: Default project if it can be determined.
150 """
151 if project is None:
--> 152 _, project = google.auth.default()
153 return project
File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/auth/_default.py:685, in default(scopes, request, quota_project_id, default_scopes)
677 _LOGGER.warning(
678 "No project ID could be determined. Consider running "
679 "`gcloud config set project` or setting the %s "
680 "environment variable",
681 environment_vars.PROJECT,
682 )
683 return credentials, effective_project_id
--> 685 raise exceptions.DefaultCredentialsError(_CLOUD_SDK_MISSING_CREDENTIALS)
DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.
Providing baseline
and reform
policies¶
The baseline
and reform
policies are dictionaries that represent the policy to simulate. You don’t have to provide a reform policy (if you don’t, the simulation will just simulate the baseline policy). You also don’t have to provide a baseline policy (if you don’t, the simulation will just compare your reform scenario against current law).
If you do, they should each follow this syntax:
{
"gov.hmrc.income_tax.rate": { // Parameter address, in the country model's `parameters/` folder
"2025": 0.2 // Value to set the parameter to in the year 2025
}
}
You can also use this shorthand to set parameters for all years:
{
"gov.hmrc.income_tax.rate": 0.2
}
Providing data
¶
If you set scope
to macro
, you should provide either:
A Google Cloud
.h5
dataset address in this format:"gcs://policyengine-us-data/cps_2023.h5"
(gcs://bucket/path.h5
).An instance of
policyengine_core.data.Dataset
(advanced).
See policyengine.constants
for the available datasets.
If you set scope
to household
, you should provide a dictionary that represents a household. This should look like:
{
"people": { // Entity group
"person": { // Entity name
"age": { // Variable (in the country model's `variables/` folder)
"2025": 30, // Time period and value
}
}
},
"households": {
"household": {
"members": ["person"], // Group entities need a `members` field
"region": {
"2025": "LONDON",
}
}
}
}
See the country model’s repository for more information on what entity types are available.
Module documentation¶
Simulate tax-benefit policy and derive society-level output statistics.
- class policyengine.simulation.Simulation(**options: SimulationOptions)¶
Simulate tax-benefit policy and derive society-level output statistics.
- baseline_simulation: Simulation¶
The baseline tax-benefit simulation.
- check_data_version() None ¶
Check the data versions of the simulation against the current data versions.
- check_model_version() None ¶
Check the package versions of the simulation against the current package versions.
- data_version: str | None = None¶
The version of the data used in the simulation.
- is_comparison: bool¶
Whether the simulation is a comparison between two scenarios.
- reform_simulation: Simulation | None = None¶
The reform tax-benefit simulation.
- pydantic model policyengine.simulation.SimulationOptions¶
Show JSON schema
{ "title": "SimulationOptions", "type": "object", "properties": { "country": { "description": "The country to simulate.", "enum": [ "uk", "us" ], "title": "Country", "type": "string" }, "scope": { "description": "The scope of the simulation.", "enum": [ "household", "macro" ], "title": "Scope", "type": "string" }, "data": { "anyOf": [ { "type": "string" }, { "additionalProperties": true, "type": "object" }, {}, { "type": "null" } ], "default": null, "description": "The data to simulate.", "title": "Data" }, "time_period": { "default": 2025, "description": "The time period to simulate.", "title": "Time Period", "type": "integer" }, "reform": { "anyOf": [ { "$ref": "#/$defs/ParametricReform" }, {}, { "type": "null" } ], "default": null, "description": "The reform to simulate.", "title": "Reform" }, "baseline": { "anyOf": [ { "$ref": "#/$defs/ParametricReform" }, {}, { "type": "null" } ], "default": null, "description": "The baseline to simulate.", "title": "Baseline" }, "region": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "The region to simulate within the country.", "title": "Region" }, "subsample": { "anyOf": [ { "type": "integer" }, { "type": "null" } ], "default": null, "description": "How many, if a subsample, households to randomly simulate.", "title": "Subsample" }, "title": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": "[Analysis title]", "description": "The title of the analysis (for charts). If not provided, a default title will be generated.", "title": "Title" }, "include_cliffs": { "anyOf": [ { "type": "boolean" }, { "type": "null" } ], "default": false, "description": "Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.", "title": "Include Cliffs" }, "model_version": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.", "title": "Model Version" }, "data_version": { "anyOf": [ { "type": "string" }, { "type": "null" } ], "default": null, "description": "The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.", "title": "Data Version" } }, "$defs": { "ParameterChangeDict": { "additionalProperties": { "$ref": "#/$defs/ParameterChangeValue" }, "description": "A dict of changes to a parameter, with custom date string as keys\nand various possible value types.\n\nKeys can be formatted one of two ways:\n1. A single year (e.g., \"YYYY\")\n2. A date range (e.g., \"YYYY-MM-DD.YYYY-MM-DD\")", "title": "ParameterChangeDict", "type": "object" }, "ParameterChangeValue": { "description": "A value for a parameter change, which can be any primitive type or 'Infinity'/'-Infinity'", "title": "ParameterChangeValue" }, "ParametricReform": { "additionalProperties": { "anyOf": [ { "$ref": "#/$defs/ParameterChangeValue" }, { "$ref": "#/$defs/ParameterChangeDict" } ] }, "description": "A reform that just changes parameter values.\n\nThis is a dict that equates a parameter name to either a single value or a dict of changes.", "title": "ParametricReform", "type": "object" } }, "required": [ "country", "scope" ] }
- Config:
arbitrary_types_allowed: bool = True
- Fields:
- field baseline: ParametricReform | Type[Reform] | None = None¶
The baseline to simulate.
- field country: Literal['uk', 'us'] [Required]¶
The country to simulate.
- field data: str | dict[Any, Any] | Any | None = None¶
The data to simulate.
- field data_version: str | None = None¶
The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.
- field include_cliffs: bool | None = False¶
Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.
- field model_version: str | None = None¶
The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.
- field reform: ParametricReform | Type[Reform] | None = None¶
The reform to simulate.
- field region: str | None = None¶
The region to simulate within the country.
- field scope: Literal['household', 'macro'] [Required]¶
The scope of the simulation.
- field subsample: int | None = None¶
How many, if a subsample, households to randomly simulate.
- field time_period: int = 2025¶
The time period to simulate.
- field title: str | None = '[Analysis title]'¶
The title of the analysis (for charts). If not provided, a default title will be generated.