Validation against TAXSIM#
Validation tests are carried out on every new version update against TAXSIM35, developed by NBER. We primarily test that tax units in the Current Population Survey have tax liabilities close to PolicyEngine US (‘close’ defined as within $100).
Show code cell source
from policyengine_us import Microsimulation
from policyengine_us.data.datasets import CPS
from policyengine_us.tools.dev.taxsim.generate_taxsim_tests import TaxSim35
import numpy as np
import pandas as pd
# Disable warnings
import warnings
warnings.filterwarnings("ignore")
STATES = ["MA", "MA", "NY", "WA"]
DISTANCE = 100
taxsim = TaxSim35()
sim = Microsimulation()
taxsim_df = taxsim.generate_from_microsimulation(
CPS, 2022, None, True, False
).set_index("taxsim_taxsimid")
def get_federal_tax_against_taxsim_results():
tax = sim.calc("income_tax")
tax.index = sim.calc("tax_unit_id").values
comparison_df = pd.DataFrame(index=sim.calc("tax_unit_id").values)
comparison_df["policyengine_us"] = tax
comparison_df["taxsim"] = taxsim_df.taxsim_fiitax
relative_distance = np.absolute(
comparison_df.policyengine_us - comparison_df.taxsim
)
return (relative_distance < DISTANCE).mean()
def get_state_income_tax_against_taxsim_results(state: str):
in_state = sim.calc("tax_unit_state").values == state
tax = sim.calc("state_income_tax")
tax.index = sim.calc("tax_unit_id").values
comparison_df = pd.DataFrame(
dict(
policyengine_us=tax,
taxsim=taxsim_df.taxsim_siitax,
),
index=sim.calc("tax_unit_id").values,
)
comparison_df = comparison_df[in_state]
relative_distance = np.absolute(
comparison_df.policyengine_us - comparison_df.taxsim
)
return (relative_distance < DISTANCE).mean()
results_df = pd.DataFrame(
{
"Region": ["Federal"] + STATES,
"Percent close": [get_federal_tax_against_taxsim_results()]
+ [
get_state_income_tax_against_taxsim_results(state)
for state in STATES
],
}
)
results_df["Percent close"] = results_df["Percent close"].apply(
lambda x: f"{x:.1%}"
)
results_df.set_index("Region")
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
/tmp/ipykernel_4167/2493350769.py in <cell line: 0>()
1 from policyengine_us import Microsimulation
----> 2 from policyengine_us.data.datasets import CPS
3 from policyengine_us.tools.dev.taxsim.generate_taxsim_tests import TaxSim35
4 import numpy as np
5 import pandas as pd
ImportError: cannot import name 'CPS' from 'policyengine_us.data.datasets' (/home/runner/work/policyengine-us/policyengine-us/policyengine_us/data/datasets/__init__.py)