Validation against TAXSIM#

Validation tests are carried out on every new version update against TAXSIM35, developed by NBER. We primarily test that tax units in the Current Population Survey have tax liabilities close to PolicyEngine US (‘close’ defined as within $100).

Hide code cell source
from policyengine_us import Microsimulation
from policyengine_us.data.datasets import CPS
from policyengine_us.tools.dev.taxsim.generate_taxsim_tests import TaxSim35
import numpy as np
import pandas as pd

# Disable warnings
import warnings

warnings.filterwarnings("ignore")

STATES = ["MA", "MA", "NY", "WA"]
DISTANCE = 100

taxsim = TaxSim35()
sim = Microsimulation()

taxsim_df = taxsim.generate_from_microsimulation(
    CPS, 2022, None, True, False
).set_index("taxsim_taxsimid")


def get_federal_tax_against_taxsim_results():
    tax = sim.calc("income_tax")
    tax.index = sim.calc("tax_unit_id").values
    comparison_df = pd.DataFrame(index=sim.calc("tax_unit_id").values)
    comparison_df["policyengine_us"] = tax
    comparison_df["taxsim"] = taxsim_df.taxsim_fiitax
    relative_distance = np.absolute(
        comparison_df.policyengine_us - comparison_df.taxsim
    )
    return (relative_distance < DISTANCE).mean()


def get_state_income_tax_against_taxsim_results(state: str):
    in_state = sim.calc("tax_unit_state").values == state
    tax = sim.calc("state_income_tax")
    tax.index = sim.calc("tax_unit_id").values
    comparison_df = pd.DataFrame(
        dict(
            policyengine_us=tax,
            taxsim=taxsim_df.taxsim_siitax,
        ),
        index=sim.calc("tax_unit_id").values,
    )
    comparison_df = comparison_df[in_state]
    relative_distance = np.absolute(
        comparison_df.policyengine_us - comparison_df.taxsim
    )
    return (relative_distance < DISTANCE).mean()


results_df = pd.DataFrame(
    {
        "Region": ["Federal"] + STATES,
        "Percent close": [get_federal_tax_against_taxsim_results()]
        + [
            get_state_income_tax_against_taxsim_results(state)
            for state in STATES
        ],
    }
)
results_df["Percent close"] = results_df["Percent close"].apply(
    lambda x: f"{x:.1%}"
)
results_df.set_index("Region")
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[1], line 2
      1 from policyengine_us import Microsimulation
----> 2 from policyengine_us.data.datasets import CPS
      3 from policyengine_us.tools.dev.taxsim.generate_taxsim_tests import TaxSim35
      4 import numpy as np

ImportError: cannot import name 'CPS' from 'policyengine_us.data.datasets' (/home/runner/work/policyengine-us/policyengine-us/policyengine_us/data/datasets/__init__.py)