import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import datetime
from rsrc.tests import *
from rsrc.src import fit_and_display, load_compas_data, Helper, LRModel
from aif360.sklearn.preprocessing import Reweighing
from aif360.sklearn.preprocessing import LearnedFairRepresentations
from aif360.sklearn.inprocessing import ExponentiatedGradientReduction

from warnings import filterwarnings, simplefilter
simplefilter(action='ignore', category=FutureWarning)
filterwarnings("ignore", "lbfgs")

WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:
pip install 'aif360[AdversarialDebiasing]'

WARNING:root:No module named 'inFairness': SenSeI and SenSR will be unavailable. To install, run:
pip install 'aif360[inFairness]'

WARNING:root:No module named 'rpy2': FairAdapt will be unavailable. To install, run:
pip install 'aif360[FairAdapt]'

data = load_compas_data()

# displaying
display(data.head())
print(f"Columns available: {list(data.columns)}")
print(f"Total number of cases: {data.shape[0]}")

Columns available: ['id', 'name', 'first', 'last', 'sex', 'dob', 'age', 'age_cat', 'race', 'juv_fel_count', 'decile_score', 'juv_misd_count', 'juv_other_count', 'priors_count', 'c_jail_in', 'c_jail_out', 'c_offense_date', 'c_arrest_date', 'c_charge_degree', 'c_charge_desc', 'is_recid', 'in_custody', 'out_custody']
Total number of cases: 6172

### YOUR CODE HERE
# We add a column that contains whether the defendant has been predicted as potential recidivist or not
data['predicted_recid'] = data['decile_score'] >= 5 # SOLUTION

# We now compute the accuracy of the compas tool, using the column 'is_recid' (see above if you don't understand what the column 'is_recid' stands for)  
accuracy_compas = data[data['is_recid'] == data['predicted_recid']].shape[0] / data.shape[0] # SOLUTION
### END OF YOUR CODE

# simple test to check if you have the correct value :)
test_values(round(accuracy_compas*100), "accuracy_compas")

print(f"Accuracy of the COMPAS tool: {accuracy_compas*100:.1f}%")

🆗 Tests passed ! =)
Accuracy of the COMPAS tool: 65.9%

print(f"All races: {list(data['race'].unique())}")

### YOUR CODE HERE
black_defendants = data[data["race"] == "African-American"] # SOLUTION
black_defendants_number = black_defendants.shape[0] # SOLUTION
print(f"Number of cases for Black defendants:{black_defendants_number}")

white_defendants = data[data["race"] == "Caucasian"] # SOLUTION
white_defendants_number = white_defendants.shape[0] # SOLUTION
print(f"Number of cases for White defendants:{white_defendants_number}")
### END OF YOUR CODE

test_values((white_defendants.shape[0], black_defendants.shape[0]), "defendants_counts")

All races: ['Other', 'African-American', 'Caucasian', 'Hispanic', 'Asian', 'Native American']
Number of cases for Black defendants:3175
Number of cases for White defendants:2103
🆗 Tests passed ! =)

def fpr(data):
    """
    Return the false positive rate for the data, using is_recid and the decile_score (or predicted_recid)
    """
    ### YOUR CODE HERE
    # dataframe with defendants who did not recidivate
    did_not_recidivate = data[data["is_recid"] == 0] # SOLUTION

    # number of defendants who did not recidivate (actual negatives)
    n = did_not_recidivate.shape[0] # SOLUTION
    
    # number of defendants predicted to recidivate among those who actually did not recidivate (false positives)
    fp = did_not_recidivate[did_not_recidivate["decile_score"] >= 5].shape[0] # SOLUTION

    # false positive rate
    result = fp / n # SOLUTION
    ### END OF YOUR CODE

    return result


def fnr(data):
    """
    Return the false negative rate for the data, using is_recid and the decile_score (or predicted_recid)
    """
    ### YOUR CODE HERE
    # dataframe with defendants who did recidivate
    recidivated = data[data["is_recid"] == 1] # SOLUTION
    
    # number of defendants who did recidivate (actual positives)
    p = recidivated.shape[0] # SOLUTION

    # number of defendants predicted NOT to recidivate among those who actually did recidivate (false negatives)
    fn = recidivated[recidivated["decile_score"] < 5].shape[0] # SOLUTION

    # false negative rate
    result = fn / p # SOLUTION
    ### END OF YOUR CODE

    return result

resume_exec = True

print("Testing FPR...")
resume_exec = test(fpr)
print()
print("Testing FNR...")
resume_exec = test(fnr) and resume_exec

# This part won't execute if the tests don't pass
if resume_exec:
    propublica_point = pd.DataFrame(data={'White': [f"{int(fpr(white_defendants)*100)}%", f"{int(fnr(white_defendants)*100)}%"], 'Black': [f"{int(fpr(black_defendants)*100)}%", f"{int(fnr(black_defendants)*100)}%"]}, index=["Among non-recidivists: % of high scored (FPR)", "Among recidivists: % of low scored (FNR)"])
    display(propublica_point.style.set_caption("ProPublica metrics"))

Testing FPR...
🆗 Tests passed ! =)

Testing FNR...
🆗 Tests passed ! =)

def ppv_complement(data):
    """
    Return the complement to the Positive Predicted Value for the data, using is_recid and the decile_score (or predicted_recid)
    """
    ### YOUR CODE HERE
    # dataframe with defendants who were predicted to recidivate
    predicted_recidivate = data[data["decile_score"] >= 5] # SOLUTION

    # number of defendants who were predicted to recidivate (predicted positives)
    pp = predicted_recidivate.shape[0] # SOLUTION

    # number of defendants who actually recidivated among those predicted to recidivate (true positives)
    tp = predicted_recidivate[predicted_recidivate["is_recid"] == 1].shape[0] # SOLUTION

    # positive predictive value
    ppv = tp / pp # SOLUTION
    ### END OF YOUR CODE

    return 1-ppv


def npv_complement(data):
    """
    Return the the complement to the Negative Predicted Value for the data, using is_recid and the decile_score (or predicted_recid)
    """
    ### YOUR CODE HERE
    # dataframe with defendants who were predicted NOT to recidivate
    predicted_not_recidivate = data[data["decile_score"] < 5] # SOLUTION
    
    # number of defendants who were predicted not to recidivate (predicted negatives)
    pn = predicted_not_recidivate.shape[0] # SOLUTION
    
    # number of defendants who actually did NOT recidivated among those predicted NOT to recidivate (true negatives)
    tn = predicted_not_recidivate[predicted_not_recidivate["is_recid"] == 0].shape[0] # SOLUTION

    # negative predictive value
    npv = tn / pn # SOLUTION
    ### END OF YOUR CODE

    return 1-npv

resume_exec = True

print("Testing PPV complement...")
resume_exec = test(ppv_complement)
print()
print("Testing NPV complement..")
resume_exec = test(npv_complement) and resume_exec

# This part won't execute if the tests don't pass
if resume_exec:
    northpointe_point = pd.DataFrame(data={'White': [f"{int(ppv_complement(white_defendants)*100)}%", f"{int(npv_complement(white_defendants)*100)}%"], 'Black': [f"{int(ppv_complement(black_defendants)*100)}%", f"{int(npv_complement(black_defendants)*100)}%"]}, index=["Among the high scored: % of non-recidivist (1-PPV)", "Among the low scored: % of recidivists (1-NPV)"])
    display(northpointe_point.style.set_caption("NorthPointe metrics"))

Testing PPV complement...
🆗 Tests passed ! =)

Testing NPV complement..
🆗 Tests passed ! =)

display(propublica_point.style.set_caption("ProPublica metrics"))
print()
display(northpointe_point.style.set_caption("NorthPointe metrics"))

scaler = StandardScaler()
features = pd.DataFrame(scaler.fit_transform(pd.concat(
    [
        data["priors_count"],
        data["juv_fel_count"],
        pd.DataFrame(data["c_jail_out"].map(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").timestamp()) - data["c_jail_in"].map(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S").timestamp()), columns=['jail_time'], index=data.index)
    ], axis='columns')), columns=['priors_count', 'juv_fel_count', 'jail_time'], index=data.index)
features.head()

pd.get_dummies(data['sex']).sample(5, random_state=123)

pd.DataFrame(
    {"is_female": pd.get_dummies(data['sex'])['Female']}
).sample(5, random_state=123)

### YOUR CODE HERE
features["is_female"] = pd.get_dummies(data["sex"])["Female"] # SOLUTION
features["is_charge_felony"] = pd.get_dummies(data["c_charge_degree"])["F"] # SOLUTION
# END OF YOUR CODE

# Simply testing your values...
test_values(tuple(features["is_female"].sample(5, random_state=123)), "sex_indicator")
test_values(tuple(features["is_charge_felony"].sample(5, random_state=123)), "is_charge_felony_indicator")

features.head()

🆗 Tests passed ! =)
🆗 Tests passed ! =)

### YOUR CODE HERE
# BEGIN SOLUTION
features[["age < 25", "age > 45"]] = pd.get_dummies(data["age_cat"])[["Less than 25", "Greater than 45"]]
features[["African-American", "Caucasian", "Asian", "Hispanic", "Native American", "Other"]] = pd.get_dummies(data["race"])[["African-American", "Caucasian", "Asian", "Hispanic", "Native American", "Other"]]
# END SOLUTION
### END OF YOUR CODE

test_values(sorted(features.columns), "naive features columns")
test_values(features[["age < 25", "age > 45"]].sample(5, random_state=246).to_numpy().__repr__(), "age_indicator")
test_values(features[["African-American", "Caucasian", "Asian", "Hispanic", "Native American", "Other"]].sample(5, random_state=5).to_numpy().__repr__(), "race_indicator")

features.head()

🆗 Tests passed ! =)
🆗 Tests passed ! =)
🆗 Tests passed ! =)

# Get the labels
labels = data["is_recid"].copy()
# The features are available in the "feature" variable

# Create the model 
model = LRModel(max_iter=1000) # Custom implementation that uses LogisticRegression from sklearn package

# Fit the model!
### YOUR CODE HERE
# BEGIN SOLUTION
model.fit(features, labels)
# END SOLUTION
### END OF YOU CODE

print(f"Model trained! Prediction accuracy: {model.score(features, labels)*100}%")
model.print_coefs(features.columns)

Model trained! Prediction accuracy: 65.76673866090714%

# /!\ mysterious_metric only works if predictions are 0 or 1
def mysterious_calculation(predictions):
    """
    :param predictions: Indicator of the predicted value for each sample (is 0 or 1 for each sample)
    """
    return predictions.sum() / predictions.shape[0]

black_disparate = mysterious_calculation(model.predict(features[features['African-American'] == 1], "test"))
white_disparate = mysterious_calculation(model.predict(features[features['Caucasian'] == 1], "test"))

disparate_impact_ratio = white_disparate / black_disparate

display(pd.DataFrame({"White defendants": [white_disparate], "Black defendants": [black_disparate]}, index=["Mysterious calculation"]))
print(f"Disparate impact ratio: {disparate_impact_ratio}")

Disparate impact ratio: 0.5055731046931408

### YOUR CODE HERE
features_no_race = features.drop(["Other", "African-American", "Hispanic", "Asian", "Native American", "Caucasian"], axis='columns') # SOLUTION
# END OF YOUR CODE

test_values(sorted(list(features_no_race.columns)), "features_no_race")

features_no_race.head()

🆗 Tests passed ! =)

fit_and_display(features_no_race, labels, white_defendants, black_defendants)

Accuracy of the model: 67.170626349892%.

Disparate impact ratio: 0.5284669811320755

data.columns

Index(['id', 'name', 'first', 'last', 'sex', 'dob', 'age', 'age_cat', 'race',
       'juv_fel_count', 'decile_score', 'juv_misd_count', 'juv_other_count',
       'priors_count', 'c_jail_in', 'c_jail_out', 'c_offense_date',
       'c_arrest_date', 'c_charge_degree', 'c_charge_desc', 'is_recid',
       'in_custody', 'out_custody', 'predicted_recid'],
      dtype='object')

pre_processed_features = features.copy()

not_preprocessed_features = ['juv_misd_count', 'juv_other_count']
pre_processed_features[not_preprocessed_features] = pd.DataFrame(
    scaler.fit_transform(data[not_preprocessed_features]), 
    columns=not_preprocessed_features, 
    index=features.index
)

display(pre_processed_features.head())
print(f"Features at disposal: {list(pre_processed_features.columns)}")

Features at disposal: ['priors_count', 'juv_fel_count', 'jail_time', 'is_female', 'is_charge_felony', 'age < 25', 'age > 45', 'African-American', 'Caucasian', 'Asian', 'Hispanic', 'Native American', 'Other', 'juv_misd_count', 'juv_other_count']

### YOUR CODE HERE
selected_features = ['jail_time', 'is_charge_felony', 'juv_fel_count', 'juv_misd_count', 'juv_other_count'] # SOLUTION
### END OF YOUR CODE

fit_and_display(pre_processed_features[selected_features], labels, white_defendants, black_defendants)

Accuracy of the model: 60.259179265658744%.

Disparate impact ratio: 0.6572294776119403

display(
    pd.DataFrame({"is_recid": features.corrwith(labels), "African-American": features.corrwith(features["African-American"]), "Caucasian": features.corrwith(features['Caucasian'])})
    .drop(['African-American', 'Caucasian', 'Asian', 'Hispanic', 'Native American', 'Other']) # we drop the races as it is irrelevant
    .style.map(lambda e: f"color: rgb(255, {255 - abs(e)*255/0.3}, {255 - abs(e)*255/0.3})")
)

### YOUR CODE HERE
most_uncorr_features = features[['juv_fel_count', 'jail_time', 'is_charge_felony', 'is_female']] # SOLUTION

fit_and_display(most_uncorr_features, labels, white_defendants, black_defendants)

Accuracy of the model: 60.69114470842333%.

Disparate impact ratio: 0.768615107913669

model = LogisticRegression(max_iter=1000)

model_constrained = ExponentiatedGradientReduction(estimator=model, prot_attr='African-American', constraints="DemographicParity", drop_prot_attr=False)

fit_and_display(features, labels, white_defendants, black_defendants, model=model_constrained)

Accuracy of the model: 65.48930654569021%.

Disparate impact ratio: 0.9921504739336492

### YOUR CODE HERE
recid_stats = pd.crosstab(data['is_recid'], data['race'])[["Caucasian", "African-American"]] # SOLUTION
display(recid_stats)
### END OF YOUR CODE

# checking you have the correct values
test_values(list((recid_stats.to_numpy().flatten())), "recid_stats")

# Compute the percentages
print("\nPercentages:")
for col in recid_stats.columns:
    recid_stats[col] = recid_stats[col].map(lambda e: f"{e} ({round(e*100 / recid_stats[col].sum())}%)")
recid_stats

🆗 Tests passed ! =)

Percentages:

features_fair = features.set_index('African-American', append=True)
lfr = LearnedFairRepresentations('African-American', n_prototypes=10, max_iter=4, random_state=9876)
features_fair = lfr.fit_transform(features_fair.astype(np.float64), labels)

### YOUR CODE HERE
# BEGIN SOLUTION NO PROMPT
display(features.head().style.set_caption('Before the manipulation'))
print()
display(features_fair.head().style.set_caption('After the manipulation'))
# END SOLUTION
""" # BEGIN PROMPT
display(...)
display(...)
"""; # END PROMPT

fit_and_display(features_fair, labels, white_defendants, black_defendants)

Accuracy of the model: 63.498920086393085%.

Disparate impact ratio: 1.0345588235294119

rw = Reweighing("African-American")
_, weights = rw.fit_transform(Helper.get_train_samples(features.set_index('African-American', append=True)), Helper.get_train_samples(labels))

model = fit_and_display(features.set_index('African-American', append=True), labels, white_defendants, black_defendants, weights=weights, return_model=True)

Accuracy of the model: 65.65874730021598%.

Disparate impact ratio: 0.8700327510917031

weights_summary = pd.DataFrame(weights).value_counts().reset_index().rename(columns={0: 'Weight', 'count': 'Number of samples'})
display(weights_summary)

	id	name	first	last	sex	dob	age	age_cat	race	...	priors_count	c_jail_in	c_jail_out	c_offense_date	c_arrest_date	c_charge_degree	c_charge_desc	is_recid	in_custody	out_custody
0	1	miguel hernandez	miguel	hernandez	Male	1947-04-18	69	Greater than 45	Other	...	0	2013-08-13 06:03:42	2013-08-14 05:41:20	2013-08-13	NaN	F	Aggravated Assault w/Firearm	0	2014-07-07	2014-07-14
1	3	kevon dixon	kevon	dixon	Male	1982-01-22	34	25 - 45	African-American	...	0	2013-01-26 03:45:27	2013-02-05 05:36:53	2013-01-26	NaN	F	Felony Battery w/Prior Convict	1	2013-01-26	2013-02-05
2	4	ed philo	ed	philo	Male	1991-05-14	24	Less than 25	African-American	...	4	2013-04-13 04:58:34	2013-04-14 07:02:04	2013-04-13	NaN	F	Possession of Cocaine	1	2013-06-16	2013-06-16
5	7	marsha miles	marsha	miles	Male	1971-08-22	44	25 - 45	Other	...	0	2013-11-30 04:50:18	2013-12-01 12:28:56	2013-11-30	NaN	M	Battery	0	2013-11-30	2013-12-01
6	8	edward riddle	edward	riddle	Male	1974-07-23	41	25 - 45	Caucasian	...	14	2014-02-18 05:08:24	2014-02-24 12:18:30	2014-02-18	NaN	F	Possession Burglary Tools	1	2014-03-31	2014-04-18

	priors_count	juv_fel_count	jail_time
0	-0.684413	-0.127923	-0.302617
1	-0.684413	-0.127923	-0.107825
2	0.158866	-0.127923	-0.300447
5	-0.684413	-0.127923	-0.295461
6	2.267065	-0.127923	-0.188773

	Coefficients of the model
priors_count	0.849837
juv_fel_count	0.058701
jail_time	0.106816
is_female	-0.371412
is_charge_felony	0.117323
age < 25	0.770163
age > 45	-0.738011
African-American	0.172767
Caucasian	0.055662
Asian	-0.008043
Hispanic	-0.177392
Native American	-0.012142
Other	-0.150954

	is_recid	African-American	Caucasian
priors_count	0.294522	0.215184	-0.145093
juv_fel_count	0.085108	0.057850	-0.052881
jail_time	0.108439	0.059794	-0.043677
is_female	-0.110840	-0.045781	0.071087
is_charge_felony	0.115557	0.104047	-0.077574
age < 25	0.112593	0.091101	-0.092665
age > 45	-0.143709	-0.157048	0.157450

	Caucasian	African-American
is_recid
0	# White non recid	# Black non recid
1	# White recid	# Black recid

Fairness Week 2: Predicting recidivism¶

Introduction¶

Setup and dataset exploration¶

The dataset¶

Accuracy¶

Groups in the dataset¶

Part 1. Evaluating fairness, not trivial.¶

1.1 - ProPublica's perspective¶

1.2 - Northpointe's perspective¶

1.3 - So, who is right?¶

Part 2. A design matter?¶

2.1 - Creating a Logistic Regression model¶

Data preprocessing¶

Fitting a LR model¶

2.2 - Evaluating fairness with the Disparate Impact Ratio¶

2.3 - Trying to improve fairness¶

Removing the race in the input¶

A more bruteforce attempt to remove the bias¶

Using the correlations in order to select the best features¶

[Optional] 2.4 - Constraining the model¶

Part 3. Troubles from the data¶

3.1 - Prevalence of recidivism accross groups¶

[Optional] 3.2 - Solutions against biased datasets¶

Transformation¶

Reweighing¶

Synthesis¶

Conclusion¶

	White	Black
Among non-recidivists: % of high scored (FPR)	21%	41%
Among recidivists: % of low scored (FNR)	50%	29%

	White	Black
Among the high scored: % of non-recidivist (1-PPV)	38%	31%
Among the low scored: % of recidivists (1-NPV)	31%	39%

	Female	Male
3983	True	False
3406	True	False
6001	False	True
4423	True	False
49	False	True

		priors_count	juv_fel_count	jail_time	is_female	is_charge_felony	age < 25	age > 45	Caucasian	Asian	Hispanic	Native American	Other
	African-American
0	False	0.507218	0.475127	0.466003	0.422267	0.424015	0.681114	0.621424	0.549810	0.734242	0.543436	0.555198	0.508463
1	True	0.516072	0.452611	0.467314	0.433455	0.407253	0.677141	0.603751	0.559123	0.746399	0.524001	0.575254	0.478592
2	True	0.541727	0.435203	0.436117	0.413283	0.408951	0.706159	0.612333	0.573176	0.737660	0.519425	0.587610	0.457087
5	False	0.506106	0.459035	0.482171	0.430548	0.386951	0.670566	0.590508	0.552366	0.739659	0.513164	0.580058	0.508807
6	False	0.590830	0.425352	0.438339	0.413150	0.420533	0.685877	0.586998	0.605859	0.730453	0.468725	0.604520	0.449076

race	Caucasian	African-American
is_recid
0	1229	1402
1	874	1773

race	Caucasian	African-American
is_recid
0	1229 (58%)	1402 (44%)
1	874 (42%)	1773 (56%)

	Weight	Number of samples
0	0.869000	1511
1	0.868478	1505
2	1.164676	1202
3	1.192550	1028