Validation against TAXSIM#

Validation tests are carried out on every new version update against TAXSIM35, developed by NBER. We primarily test that tax units in the Current Population Survey have tax liabilities close to OpenFisca-US (‘close’ defined as within $100).

from openfisca_us.api.microsimulation import Microsimulation
from openfisca_us.data.datasets import CPS
from openfisca_us.tools.dev.taxsim.generate_taxsim_tests import TaxSim35
import numpy as np
import pandas as pd

# Disable warnings
import warnings

warnings.filterwarnings("ignore")

STATES = ["MA", "MA", "NY", "WA"]
DISTANCE = 100
MINIMUM_PERCENT_CLOSE = 0.7

taxsim = TaxSim35()
sim = Microsimulation()

taxsim_df = taxsim.generate_from_microsimulation(
    CPS, 2022, None, True, False
).set_index("taxsim_taxsimid")


def get_federal_tax_against_taxsim_results():
    tax = sim.calc("income_tax")
    tax.index = sim.calc("tax_unit_id").values
    comparison_df = pd.DataFrame(index=sim.calc("tax_unit_id").values)
    comparison_df["openfisca_us"] = tax
    comparison_df["taxsim"] = taxsim_df.taxsim_fiitax
    relative_distance = np.absolute(
        comparison_df.openfisca_us - comparison_df.taxsim
    )
    return (relative_distance < DISTANCE).mean()


def get_state_income_tax_against_taxsim_results(state: str):
    in_state = sim.calc("tax_unit_state").values == state
    tax = sim.calc("state_income_tax")
    tax.index = sim.calc("tax_unit_id").values
    comparison_df = pd.DataFrame(
        dict(
            openfisca_us=tax,
            taxsim=taxsim_df.taxsim_siitax,
        ),
        index=sim.calc("tax_unit_id").values,
    )
    comparison_df = comparison_df[in_state]
    relative_distance = np.absolute(
        comparison_df.openfisca_us - comparison_df.taxsim
    )
    return (relative_distance < DISTANCE).mean()


results_df = pd.DataFrame(
    {
        "Region": ["Federal"] + STATES,
        "Percent close": [get_federal_tax_against_taxsim_results()]
        + [
            get_state_income_tax_against_taxsim_results(state)
            for state in STATES
        ],
    }
)
results_df["Percent close"] = results_df["Percent close"].apply(
    lambda x: f"{x:.1%}"
)
results_df.set_index("Region")
Percent close
Region
Federal 88.8%
MA 81.1%
MA 81.1%
NY 77.6%
WA 91.7%