Simulation interface

The Simulation class is the core interface of this package. You can initialise it by passing in a dictionary that matches the SimulationOptions schema, and then use its calculate methods to ask it questions.

Some of the options are straightforward and some are more complex. The straightforward ones are:

  • country: uk or us.

  • scope: macro (simulating over large data to represent e.g. a country) or household (simulating over specific households you describe).

  • time_period: the year to simulate.

The next important features are:

  • reform: the policy to use in the reform scenario if we are comparing against a different scenario.

  • baseline: the policy to use in the baseline scenario if we are comparing against a different baseline scenario.

  • data: either a household (if scope is household) or a large dataset name (if scope is macro).

from policyengine import Simulation

sim = Simulation(
    country="uk",
    scope="macro",
    reform={},
    time_period=2025,
)

sim.calculate_economy_comparison()
/opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
No data provided, using default dataset: gs://policyengine-uk-data-private/enhanced_frs_2022_23.h5
Downloading enhanced_frs_2022_23.h5 from bucket policyengine-uk-data-private
INFO:root:Using Google Cloud Storage for download.
---------------------------------------------------------------------------
DefaultCredentialsError                   Traceback (most recent call last)
Cell In[1], line 3
      1 from policyengine import Simulation
----> 3 sim = Simulation(
      4     country="uk",
      5     scope="macro",
      6     reform={},
      7     time_period=2025,
      8 )
     10 sim.calculate_economy_comparison()

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:109, in Simulation.__init__(self, **options)
    105 if not isinstance(self.options.data, dict) and not isinstance(
    106     self.options.data, Dataset
    107 ):
    108     logging.debug("Loading data")
--> 109     self._set_data(self.options.data)
    110     logging.info("Data loaded")
    111 self._initialise_simulations()

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:171, in Simulation._set_data(self, file_address)
    166     version = None
    168 else:
    169     # All official PolicyEngine datasets are stored in GCS;
    170     # load accordingly
--> 171     filename, version = self._set_data_from_gs(file_address)
    172     self.data_version = version
    174 time_period = self._set_data_time_period(file_address)

File ~/work/policyengine.py/policyengine.py/policyengine/simulation.py:405, in Simulation._set_data_from_gs(self, file_address)
    401 version = self.options.data_version
    403 print(f"Downloading {filename} from bucket {bucket}", file=sys.stderr)
--> 405 filepath, version = download(
    406     filepath=filename,
    407     gcs_bucket=bucket,
    408     version=version,
    409     return_version=True,
    410 )
    412 return filename, version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data_download.py:17, in download(filepath, gcs_bucket, version, return_version)
     10 def download(
     11     filepath: str,
     12     gcs_bucket: str,
     13     version: Optional[str] = None,
     14     return_version: bool = False,
     15 ) -> Tuple[str, str] | str:
     16     logging.info("Using Google Cloud Storage for download.")
---> 17     downloaded_version = download_file_from_gcs(
     18         bucket_name=gcs_bucket,
     19         file_name=filepath,
     20         destination_path=filepath,
     21         version=version,
     22     )
     23     if return_version:
     24         return filepath, downloaded_version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:41, in download_file_from_gcs(bucket_name, file_name, destination_path, version)
     23 def download_file_from_gcs(
     24     bucket_name: str,
     25     file_name: str,
     26     destination_path: str,
     27     version: Optional[str] = None,
     28 ) -> str | None:
     29     """
     30     Download a file from Google Cloud Storage to a local path.
     31 
   (...)
     38         version (str): The version of the file that was downloaded, if available.
     39     """
---> 41     version = _get_client().download(
     42         bucket_name,
     43         file_name,
     44         Path(destination_path),
     45         version=version,
     46         return_version=True,
     47     )
     48     return version

File ~/work/policyengine.py/policyengine.py/policyengine/utils/google_cloud_bucket.py:14, in _get_client()
     12 if _caching_client is not None:
     13     return _caching_client
---> 14 _caching_client = CachingGoogleStorageClient()
     15 return _caching_client

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/caching_google_storage_client.py:19, in CachingGoogleStorageClient.__init__(self)
     18 def __init__(self):
---> 19     self.client = SimplifiedGoogleStorageClient()
     20     self.cache = diskcache.Cache()

File ~/work/policyengine.py/policyengine.py/policyengine/utils/data/simplified_google_storage_client.py:19, in SimplifiedGoogleStorageClient.__init__(self)
     18 def __init__(self):
---> 19     self.client = Client()

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/storage/client.py:247, in Client.__init__(self, project, credentials, _http, client_info, client_options, use_auth_w_custom_endpoint, extra_headers, api_key)
    244             no_project = True
    245             project = "<none>"
--> 247 super(Client, self).__init__(
    248     project=project,
    249     credentials=credentials,
    250     client_options=client_options,
    251     _http=_http,
    252 )
    254 # Validate that the universe domain of the credentials matches the
    255 # universe domain of the client.
    256 if self._credentials.universe_domain != self.universe_domain:

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:338, in ClientWithProject.__init__(self, project, credentials, client_options, _http)
    337 def __init__(self, project=None, credentials=None, client_options=None, _http=None):
--> 338     _ClientProjectMixin.__init__(self, project=project, credentials=credentials)
    339     Client.__init__(
    340         self, credentials=credentials, client_options=client_options, _http=_http
    341     )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:286, in _ClientProjectMixin.__init__(self, project, credentials)
    283     project = getattr(credentials, "project_id", None)
    285 if project is None:
--> 286     project = self._determine_default(project)
    288 if project is None:
    289     raise EnvironmentError(
    290         "Project was not passed and could not be "
    291         "determined from the environment."
    292     )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/client/__init__.py:305, in _ClientProjectMixin._determine_default(project)
    302 @staticmethod
    303 def _determine_default(project):
    304     """Helper:  use default project detection."""
--> 305     return _determine_default_project(project)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/cloud/_helpers/__init__.py:152, in _determine_default_project(project)
    140 """Determine default project ID explicitly or implicitly as fall-back.
    141 
    142 See :func:`google.auth.default` for details on how the default project
   (...)
    149 :returns: Default project if it can be determined.
    150 """
    151 if project is None:
--> 152     _, project = google.auth.default()
    153 return project

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/google/auth/_default.py:685, in default(scopes, request, quota_project_id, default_scopes)
    677             _LOGGER.warning(
    678                 "No project ID could be determined. Consider running "
    679                 "`gcloud config set project` or setting the %s "
    680                 "environment variable",
    681                 environment_vars.PROJECT,
    682             )
    683         return credentials, effective_project_id
--> 685 raise exceptions.DefaultCredentialsError(_CLOUD_SDK_MISSING_CREDENTIALS)

DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.

Providing baseline and reform policies

The baseline and reform policies are dictionaries that represent the policy to simulate. You don’t have to provide a reform policy (if you don’t, the simulation will just simulate the baseline policy). You also don’t have to provide a baseline policy (if you don’t, the simulation will just compare your reform scenario against current law).

If you do, they should each follow this syntax:

{
    "gov.hmrc.income_tax.rate": { // Parameter address, in the country model's `parameters/` folder
        "2025": 0.2 // Value to set the parameter to in the year 2025
    }
}

You can also use this shorthand to set parameters for all years:

{
    "gov.hmrc.income_tax.rate": 0.2
}

Providing data

If you set scope to macro, you should provide either:

  • A Google Cloud .h5 dataset address in this format: "gcs://policyengine-us-data/cps_2023.h5" (gcs://bucket/path.h5).

  • An instance of policyengine_core.data.Dataset (advanced).

See policyengine.constants for the available datasets.

If you set scope to household, you should provide a dictionary that represents a household. This should look like:

{
    "people": { // Entity group
        "person": { // Entity name
            "age": { // Variable (in the country model's `variables/` folder)
                "2025": 30, // Time period and value
            }
        }
    },
    "households": {
        "household": {
            "members": ["person"], // Group entities need a `members` field
            "region": {
                "2025": "LONDON",
            }
        }
    }
}

See the country model’s repository for more information on what entity types are available.

Module documentation

Simulate tax-benefit policy and derive society-level output statistics.

class policyengine.simulation.Simulation(**options: SimulationOptions)

Simulate tax-benefit policy and derive society-level output statistics.

baseline_simulation: Simulation

The baseline tax-benefit simulation.

check_data_version() None

Check the data versions of the simulation against the current data versions.

check_model_version() None

Check the package versions of the simulation against the current package versions.

data_version: str | None = None

The version of the data used in the simulation.

is_comparison: bool

Whether the simulation is a comparison between two scenarios.

reform_simulation: Simulation | None = None

The reform tax-benefit simulation.

pydantic model policyengine.simulation.SimulationOptions

Show JSON schema
{
   "title": "SimulationOptions",
   "type": "object",
   "properties": {
      "country": {
         "description": "The country to simulate.",
         "enum": [
            "uk",
            "us"
         ],
         "title": "Country",
         "type": "string"
      },
      "scope": {
         "description": "The scope of the simulation.",
         "enum": [
            "household",
            "macro"
         ],
         "title": "Scope",
         "type": "string"
      },
      "data": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "additionalProperties": true,
               "type": "object"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The data to simulate.",
         "title": "Data"
      },
      "time_period": {
         "default": 2025,
         "description": "The time period to simulate.",
         "title": "Time Period",
         "type": "integer"
      },
      "reform": {
         "anyOf": [
            {
               "$ref": "#/$defs/ParametricReform"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The reform to simulate.",
         "title": "Reform"
      },
      "baseline": {
         "anyOf": [
            {
               "$ref": "#/$defs/ParametricReform"
            },
            {},
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The baseline to simulate.",
         "title": "Baseline"
      },
      "region": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The region to simulate within the country.",
         "title": "Region"
      },
      "subsample": {
         "anyOf": [
            {
               "type": "integer"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "How many, if a subsample, households to randomly simulate.",
         "title": "Subsample"
      },
      "title": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": "[Analysis title]",
         "description": "The title of the analysis (for charts). If not provided, a default title will be generated.",
         "title": "Title"
      },
      "include_cliffs": {
         "anyOf": [
            {
               "type": "boolean"
            },
            {
               "type": "null"
            }
         ],
         "default": false,
         "description": "Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.",
         "title": "Include Cliffs"
      },
      "model_version": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.",
         "title": "Model Version"
      },
      "data_version": {
         "anyOf": [
            {
               "type": "string"
            },
            {
               "type": "null"
            }
         ],
         "default": null,
         "description": "The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.",
         "title": "Data Version"
      }
   },
   "$defs": {
      "ParameterChangeDict": {
         "additionalProperties": {
            "$ref": "#/$defs/ParameterChangeValue"
         },
         "description": "A dict of changes to a parameter, with custom date string as keys\nand various possible value types.\n\nKeys can be formatted one of two ways:\n1. A single year (e.g., \"YYYY\")\n2. A date range (e.g., \"YYYY-MM-DD.YYYY-MM-DD\")",
         "title": "ParameterChangeDict",
         "type": "object"
      },
      "ParameterChangeValue": {
         "description": "A value for a parameter change, which can be any primitive type or 'Infinity'/'-Infinity'",
         "title": "ParameterChangeValue"
      },
      "ParametricReform": {
         "additionalProperties": {
            "anyOf": [
               {
                  "$ref": "#/$defs/ParameterChangeValue"
               },
               {
                  "$ref": "#/$defs/ParameterChangeDict"
               }
            ]
         },
         "description": "A reform that just changes parameter values.\n\nThis is a dict that equates a parameter name to either a single value or a dict of changes.",
         "title": "ParametricReform",
         "type": "object"
      }
   },
   "required": [
      "country",
      "scope"
   ]
}

Config:
  • arbitrary_types_allowed: bool = True

Fields:
field baseline: ParametricReform | Type[Reform] | None = None

The baseline to simulate.

field country: Literal['uk', 'us'] [Required]

The country to simulate.

field data: str | dict[Any, Any] | Any | None = None

The data to simulate.

field data_version: str | None = None

The version of the data used in the simulation. If not provided, the current data version will be used. If provided, this package will throw an error if the data version does not match. Use this as an extra safety check.

field include_cliffs: bool | None = False

Whether to include tax-benefit cliffs in the simulation analyses. If True, cliffs will be included.

field model_version: str | None = None

The version of the country model used in the simulation. If not provided, the current package version will be used. If provided, this package will throw an error if the package version does not match. Use this as an extra safety check.

field reform: ParametricReform | Type[Reform] | None = None

The reform to simulate.

field region: str | None = None

The region to simulate within the country.

field scope: Literal['household', 'macro'] [Required]

The scope of the simulation.

field subsample: int | None = None

How many, if a subsample, households to randomly simulate.

field time_period: int = 2025

The time period to simulate.

field title: str | None = '[Analysis title]'

The title of the analysis (for charts). If not provided, a default title will be generated.