%load_ext autoreload
%autoreload 2

# import libraries
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

import pickle

import shap

# delete warning
import warnings
warnings.filterwarnings('ignore')

from res.utils import *

import otter
test = otter.Notebook()

data = load_dataset_eda()
data.head()

categorical_features =  ['Loan granted', 'Relationship', 'Marital Status', 'Race', 'Sex',]
numerical_features = ['Age', 'Education-Num', 'Capital Gain', 'Capital Loss', 'Hours per week']

# plot countplots for some categorical features
fig, axs = plt.subplots(1, len(categorical_features), figsize = (16,5))
for i, col in enumerate(categorical_features):
    sns.countplot(data = data, x = col, ax = axs[i])
    axs[i].set_title(col)
    axs[i].tick_params(axis = 'x', rotation = 90)
plt.tight_layout()
plt.show()

# plot histograms for numerical features
fig, axs = plt.subplots(1, len(numerical_features), figsize = (16,4))
for i, col in enumerate(numerical_features):
    sns.histplot(data[col], ax = axs[i], kde = True)
    axs[i].set_title(col)
plt.tight_layout()
plt.show()

# load dataset
df = load_dataset_model()
df.head()

# define features and label
features = ['Workclass', 'Education-Num', 'Marital Status', 'Occupation', 'Relationship', 'Capital Gain', 'Capital Loss', 'Hours per week']
label = 'Loan granted'

# separate features from label
X = df[features]
y = df[label].tolist()

# scale all features
scaler = MinMaxScaler()
X[features] = scaler.fit_transform(X[features])

# split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'Length of train set: {len(X_train)} samples')
print(f'Length of test set: {len(X_test)} samples')

Length of train set: 23336 samples
Length of test set: 5834 samples

# Logistic regression model, defined as "lr_model"
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

LogisticRegression()

import torch
import torch.nn as nn

# Define the neural network
class SmallMLP(nn.Module):
    def __init__(self):
        super(SmallMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 256),
            nn.ReLU(),
            nn.Linear(256, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)
    
    def predict_proba(self, x):
        # take a pandas dataframe as input and return raw predictions between 0 and 1
        x = torch.tensor(x, dtype=torch.float32)
        y = self.forward(x).detach().numpy()
        return y
    
    def predict_proba_row(self, x):
        # take a pandas dataframe as input and return raw predictions between 0 and 1
        x = torch.tensor(x.values, dtype=torch.float32)
        y = self.forward(x).squeeze().detach().numpy()
        return y
    
    def predict(self, x):
        # take a pandas dataframe as input and return binary predictions (True/False)
        x = x.values
        y = self.predict_proba(x)
        return y > 0.5

# import torch.optim as optim
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler


# device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

# # Convert data to tensors
# X_train_tensor = torch.tensor(X.values, dtype=torch.float32)
# y_train_tensor = torch.tensor(y, dtype=torch.float32)

# # Split the data into train and validation sets
# X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

# # Move data to the selected device
# X_train, X_val = X_train.to(device), X_val.to(device)
# y_train, y_val = y_train.to(device), y_val.to(device)

# # Create a DataLoader
# train_dataset = TensorDataset(X_train, y_train)
# train_loader = DataLoader(train_dataset, batch_size=32)

# # Instantiate the model
# model = SmallMLP().to(device)  # Move the model to the selected device

# # Define loss and optimizer
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.00005)

# # Training loop
# num_epochs = 100

# best_val_loss = float('inf')

# for epoch in range(num_epochs):
#     model.train()
#     total_loss = 0

#     for batch_X, batch_y in train_loader:
#         optimizer.zero_grad()
        
#         outputs = model(batch_X).squeeze()
#         loss = criterion(outputs, batch_y)
        
#         loss.backward()
#         optimizer.step()
        
#         total_loss += loss.item()

#     # Validation
#     model.eval()
#     with torch.no_grad():
#         val_outputs = model(X_val).squeeze()
#         val_loss = criterion(val_outputs, y_val)
#         val_preds = (val_outputs > 0.5).float()
#         accuracy = (val_preds == y_val).float().mean()
#         if val_loss < best_val_loss:
#             best_val_loss = val_loss
#             torch.save(model.state_dict(), 'small_mlp_dict.pth')
#             print(f"Model saved at epoch {epoch+1}")
    
#     print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss.item():.4f}, Val Accuracy: {accuracy.item():.4f}")

# Load the model
mlp_model = SmallMLP()
mlp_model.load_state_dict(torch.load('models/small_mlp.pth', map_location=torch.device('cpu')))
mlp_model.eval()

SmallMLP(
  (model): Sequential(
    (0): Linear(in_features=8, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=16, bias=True)
    (5): ReLU()
    (6): Linear(in_features=16, out_features=1, bias=True)
    (7): Sigmoid()
  )
)

from sklearn.metrics import accuracy_score

# Predictions
y_lr = lr_model.predict(X_test)
y_mlp = mlp_model.predict(X_test)

# Compute accuracy
### YOUR CODE HERE ###
lr_acc = accuracy_score(y_test, y_lr) # SOLUTION
mlp_acc = accuracy_score(y_test, y_mlp) # SOLUTION
#####################

print(f"Logistic regression model accuracy: {lr_acc*100:.2f}%")
print(f"Neural network model accuracy: {mlp_acc*100:.2f}%")

Logistic regression model accuracy: 84.06%
Neural network model accuracy: 84.33%

test.check("accuracy")

accuracy

from sklearn.metrics import confusion_matrix

### YOUR CODE HERE ###
cm_lr = confusion_matrix(y_test, y_lr) # SOLUTION
cm_mlp = confusion_matrix(y_test, y_mlp) # SOLUTION
######################

display(cm_lr)
display(cm_mlp)

array([[4128,  275],
       [ 655,  776]])

array([[4095,  308],
       [ 606,  825]])

test.check("confusion_matrix")

confusion_matrix

plot_confusion_matrices(cm_lr, cm_mlp)

plot_feature_importance_analysis(lr_model, X_train)

# explainer = shap.KernelExplainer(mlp_model.predict_proba, shap.kmeans(X_train, 100) )

# shap_values = explainer(X_test)

# with open('kernel_shap_values.pkl', 'wb') as f:
#     pickle.dump(shap_values, f)
    
# print('SHAP values saved')

# shap_values.values = shap_values.values.squeeze()

# shap.plots.bar(shap_values)

# load the explainer
explainer = shap.KernelExplainer(mlp_model.predict_proba, shap.kmeans(X_train, 100))

# load the precomputed SHAP values
with open('res/kernel_shap_values.pkl', 'rb') as f:
	shap_values = pickle.load(f)
print('SHAP values loaded')

# display
shap_values.values = shap_values.values.squeeze()
shap.plots.bar(shap_values)

SHAP values loaded

# retrieve the data of applicant 363 and unscale them
applicant_363 = scaler.inverse_transform([shap_values[363].data]).round()
applicant_363_df = pd.DataFrame(applicant_363, columns=features, index=['363'])
display(applicant_363_df)

# plot the shap values
shap.plots.waterfall(shap_values[363], max_display=20)

customer_id = 1113 # SOLUTION

# retrieve the data of applicant 1113 and unscale them
applicant_1113 = scaler.inverse_transform([shap_values[customer_id].data]).round()
applicant_1113_df = pd.DataFrame(applicant_1113, columns=features, index=[customer_id])
print(f'Data for customer {customer_id}:')
display(applicant_1113_df)

# plot the shap values
print(f'SHAP values for customer {customer_id}:')
shap.plots.waterfall(shap_values[customer_id], max_display=20)

Data for customer 1113:

SHAP values for customer 1113:

### PARAMETERS ###
feature_to_change = "Capital Gain" # SOLUTION
new_value = 2000 # SOLUTION
##################
simulation_customer(customer_id, mlp_model, shap_values, explainer, X_test, feature_to_change, new_value, scaler)

Previous data for customer 1113:

Modified data for customer 1113:

New SHAP values for customer 1113:

  0%|          | 0/1 [00:00<?, ?it/s]

### Your results here ###
# solution = {"Feature name": value}
first_solution = {"Capital Gain": 1600.0} # SOLUTION
second_solution = {"Occupation": 3.0} # SOLUTION
third_solution = {"Education-Num": 13.0} # SOLUTION

test_simulation(first_solution, second_solution, third_solution, customer_id, mlp_model, X_test, scaler)

The first solution is correct
The second solution is correct
The third solution is correct

shap.plots.waterfall(shap_values[customer_id], max_display=20)

### PARAMETERS ###
feature_to_change = "Capital Loss" # SOLUTION
new_value = 1000 # SOLUTION
##################
simulation_customer(customer_id, mlp_model, shap_values, explainer, X_test, feature_to_change, new_value, scaler)

Previous data for customer 1113:

Modified data for customer 1113:

New SHAP values for customer 1113:

  0%|          | 0/1 [00:00<?, ?it/s]

plot_capital_loss_distribution(df)

	Age	Workclass	Education-Num	Marital Status	Occupation	Relationship	Race	Sex	Capital Gain	Hours per week	Loan granted
0	39.0	State-gov	13.0	Never-married	Adm-clerical	Not-in-family	White	Male	2174.0	40.0	False
1	50.0	Self-emp-not-inc	13.0	Married-civ-spouse	Exec-managerial	Husband	White	Male	0.0	13.0	False
2	38.0	Private	9.0	Divorced	Handlers-cleaners	Not-in-family	White	Male	0.0	40.0	False
3	53.0	Private	7.0	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0.0	40.0	False
5	37.0	Private	14.0	Married-civ-spouse	Exec-managerial	Wife	White	Female	0.0	40.0	False

Feature	Description	Value Range / Categories
Age	Age of the person in years.	[17, 90]
Workclass	Industry sector of employment.	Private, Self-emp-not-inc, Self-emp-inc, Federal-gov, Local-gov, State-gov, Without-pay, Never-worked
Education-Num	Highest level of education achieved, represented by a numerical code (/!\ It does not correspond to years of education)	[1, 16]
Marital Status	Marital status.	Married-civ-spouse (civilian spouse), Divorced, Never-married, Separated, Widowed, Married-spouse-absent, Married-AF-spouse (Armed Forces spouse)
Occupation	Category of occupation.	Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty, Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, Priv-house-serv, Protective-serv, Armed-Forces
Relationship	Relationship status (somewhat redundant with marital status).	Wife, Own-child, Husband, Not-in-family, Other-relative, Unmarried
Race	Race category.	White, Asian-Pac-Islander, Amer-Indian-Eskimo, Other, Black
Sex	Biological sex.	Female, Male
Capital Gain	Capital gain in the previous year.	[0.0, 99999.0]
Capital Loss	Capital loss in the previous year.	[0.0, 4356.0]
Hours per week	Number of working hours per week.	[1, 99]
Loan granted	Loan obtained or not by the person (label that the model learns to predict).	True, False

	penalty	'l2'
	dual	False
	tol	0.0001
	C	1.0
	fit_intercept	True
	intercept_scaling	1
	class_weight	None
	random_state	None
	solver	'lbfgs'
	max_iter	100
	multi_class	'deprecated'
	verbose	0
	warm_start	False
	n_jobs	None
	l1_ratio	None

Empowerment Week 2 Exercise: Explainability and Transparency¶

Introduction¶

Part 1: Choosing the best ML model¶

1.1 Load and explore the dataset¶

1.2 Train two classification models¶

1.3 Which has the best accuracy?¶

1.4 Is accuracy enough?¶

Part 2: Explaining model decisions¶

2.1 Feature Importance Analysis¶

2.2 Explainable AI (XAI) with SHAP¶

2.2.1 Generating the SHAP values for our model¶

2.2.2 Global explanation: feature importance¶

2.2.3 Local explanation: focusing on one decision¶

2.3 [Optional] Spreading trust among users¶

2.4 Model debugging¶

Part 3: Limitations of Explainable AI¶

3.1 Post-Hoc Explainability¶

3.2 Towards Self-Explainable Neural Networks¶

Challenges with Self-Explaining Neural Networks:¶

3.3 What Should We Do in Practice?¶

For Simpler Models:¶

For More Complex Models:¶

Final Recommendation:¶

Congratulations! You have finished this notebook!¶