import pandas as pd
import os
# Pandas reads the contents of the file and stores it into a "DataFrame", which is a table
path_catDogText = os.path.join('res', 'catDogText.csv')
sentences = pd.read_csv(path_catDogText, sep = ';')
sentences

def has_banned_word(sentence_to_scan, banned_word):
    """ has_banned_word checks whether sentence_to_scan contains the banned_word
    Arguments:
    - sentence_to_scan: a string of words to scan
    - banned_word: a string to scan for
    Returns:
    - True if the text contains any instances of the banned_word
    - False otherwise
    """
    ### YOUR CODE HERE
    result = banned_word.lower() in sentence_to_scan.lower() # SOLUTION
    ### END OF YOUR CODE
    
    return result

from res.tests import *
test(has_banned_word)

🆗 Tests passed ! =)

# banned word to look for
banned = "dogs"

# iterating over the rows in the dataframe
for i, row in sentences.iterrows():
    # getting the sentence in that row
    sentence = row['text']
          
    ### YOUR CODE HERE
    # checking whether the sentence contains the banned word
    contains_banned_word = has_banned_word(sentence, banned) # SOLUTION
    ### END OF YOUR CODE
    
    # Storing the result in the table
    sentences.at[i, 'contains_banned_word'] = contains_banned_word 

# show the result, highlighting in red the sentences that contain the banned word
sentences.style.apply(lambda r: ['color: red'] * len(r) if r['contains_banned_word'] else [''] * len(r), axis=1)

def has_word_from_list(sentence_to_scan, list_of_words):
    """ has_word_from_list checks whether the sentence_to_scan contains any words from the list_of_words

      Arguments:
      - sentence_to_scan: An array of words to scan
      - list_of_words: The list of banned words to scan for (lowercase)

      Returns:
      - True if sentence_to_scan contains any words from the list_of_words
      - False otherwise
    """
    for banned_word in list_of_words:
    ### YOUR CODE HERE
        # BEGIN SOLUTION
        if has_banned_word(sentence_to_scan, banned_word):
                return True 
    return False 
        # END SOLUTION

test(has_word_from_list)

🆗 Tests passed ! =)

# retrieving the list of words that are banned
path_bannedWords = os.path.join('res', 'bannedWords.csv')
list_of_banned_words = pd.read_csv(path_bannedWords, sep = ';')['text']
list_of_banned_words

0      dawgs
1        dog
2       dogs
3      doggo
4    doggies
Name: text, dtype: object

# reading the test file with slang
path_dog_variants = os.path.join('res', 'dogVariants.csv')
sentences = pd.read_csv(path_dog_variants, sep = ';')
  
# iterating over the rows in the dataframe, `i` is the index of the row, and `row` contains the row itself 
for i, row in sentences.iterrows():
    # getting the sentence in that row
    sentence = row['text']
          
    ### YOUR CODE HERE
    # checking whether the sentence contains the banned word
    contains_banned_word = has_word_from_list(sentence, list_of_banned_words) # SOLUTION
    ### END OF YOUR CODE
    
    # Storing the result in the table
    sentences.at[i, 'contains_banned_word'] = contains_banned_word 

# show the result
sentences.style.apply(lambda r: ['color: red'] * len(r) if r['contains_banned_word'] else [''] * len(r), axis=1)

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

# We instantiate the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Example of its usage
ps = sia.polarity_scores("This person is good")
ps

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/mac/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!

{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}

ps['neu']

0.508

def is_negative_statement(statement, sia=sia):
    """ is_negative_statement checks whether the statement has a negative compound polarity score.

    Arguments:
    - sia: the sentiment analyzer, initialized in main function
    - statement: the string to analyze

    Returns:
    - True if the statement is negative
    - False if the statement is neutral or positive
    """
  
    ### YOUR CODE HERE
    result = sia.polarity_scores(statement)['compound'] < 0 # SOLUTION
    ### END OF YOUR CODE

    return result

test(is_negative_statement)

🆗 Tests passed ! =)

path_catNegativity = os.path.join('res', 'catNegativity.csv')
sentences = pd.read_csv(path_catNegativity, sep = ";")
print(f'Number of sentences: {len(sentences)}')
sentences.head() #shows the first 5 rows of the dataframe

Number of sentences: 6

for i, row in sentences.iterrows():
    sentence = row['text']
    
    ### YOUR CODE HERE
    is_negative = is_negative_statement(sentence, sia) # SOLUTION
    ### END OF YOUR CODE

    sentences.at[i, 'negativity'] = is_negative

# let's apply some coloring to better visualize the result
sentences.style.apply(lambda r: ['color: red'] * len(r) if r['negativity'] else [''] * len(r), axis=1)

from datasets import load_dataset

# dataset that we will use to evaluate our model
dataset = load_dataset("tweet_eval", "offensive")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 11916
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 860
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1324
    })
})

dataset["train"][0]

{'text': '@user Bono... who cares. Soon people will understand that they gain nothing from following a phony celebrity. Become a Leader of your people instead or help and support your fellow countrymen.',
 'label': 0}

import nltk
from nltk.corpus import opinion_lexicon

# Retrieve the list of negative words
nltk.download('opinion_lexicon')
negative_words = opinion_lexicon.negative()

# Show a few examples
negative_words

[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     /Users/mac/nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!

['2-faced', '2-faces', 'abnormal', 'abolish', ...]

# BEGIN SOLUTION NO PROMPT
# Function that tests whether a text contains one of the negative words
def has_negative_word(negative_words, text):
    # Iterate over all negative words
    for negative_word in negative_words:
        # Test whether the text contains a negative word - we put the text in lower case because the lexicon is in lower case
        if negative_word in text.lower():
            # Interrupt the loop and return 1 as soon as we find one negative word
            return 1
    # Return 0 if we didn't find any negative word
    return 0

# END SOLUTION
""" # BEGIN PROMPT
def has_negative_word(negative_words, text):
    for negative_word in negative_words:
        if negative_word in text.lower():
            return 1
    return 0
"""; # END PROMPT

# Initialize two lists for storing the predictions and the labels
predictions = []
labels = []

# Iterate over all texts in the test dataset
for sample in dataset["test"]:
    # Store the result of the function indicating whether the sample contains a negative words
    predictions.append(has_negative_word(negative_words, sample["text"]))
    # Store the actual label 
    labels.append(sample["label"])

# Build a two-column dataframe with the results
comparisons = pd.DataFrame({'prediction': predictions, 'label': labels})

# Let's have a look at a few rows from the result:
comparisons.sample(5, random_state=30) # Draws 5 random rows among the dataframe (the random_state parameter ensures the reproducibility of the code)

def compute_accuracy_score(comparisons):
    """ Computes the accuracy score for a dataset with two columns

    Arguments:
    - comparisons: a dataframe with two columns labelled 'prediction' and 'label'

    Returns:
    - accuracy: proportion of correct predictions among all rows in the dataset
    """
    
    ### YOUR CODE HERE
    correct_pred = len(comparisons[comparisons["prediction"] == comparisons["label"]]) # SOLUTION
    total_pred = len(comparisons) # SOLUTION

    accuracy = correct_pred / total_pred # SOLUTION
    ### END OF YOUR CODE
    
    return accuracy

print(f'Accuracy: {compute_accuracy_score(comparisons)}')

Accuracy: 0.4232558139534884

from sklearn.metrics import accuracy_score
print(f'Accuracy: {accuracy_score(comparisons["label"], comparisons["prediction"])}')

Accuracy: 0.4232558139534884

def compute_confusion_matrix(comparisons):
    """ Computes the four elements of the confusion matrix: TP, FP, TN and FN

    Arguments:
    - comparisons: a dataframe with two columns labelled 'prediction' and 'label'

    Returns:
    - TP, FP, TN and FN (in this order)
    """

    ### YOUR CODE HERE
    # reminder: offensive = 1 = positive
    TP = comparisons[(comparisons['prediction'] == 1) & (comparisons['label'] == 1)].shape[0]
    TN = comparisons[(comparisons['prediction'] == 0) & (comparisons['label'] == 0)].shape[0] # SOLUTION
    FP = comparisons[(comparisons['prediction'] == 1) & (comparisons['label'] == 0)].shape[0] # SOLUTION
    FN = comparisons[(comparisons['prediction'] == 0) & (comparisons['label'] == 1)].shape[0] # SOLUTION
    ### END OF YOUR CODE
    
    return TP,FP,TN,FN

test(compute_confusion_matrix)

🆗 Tests passed ! =)

TP, FP, TN, FN = compute_confusion_matrix(comparisons)
draw_confusion_matrix(TP, FP, TN, FN)

# Retrieve the results from the CSV file and load them into a pandas dataframe
path_tweetsPredictions = os.path.join('res', 'tweetsPredictions.csv')
model_test_results = pd.read_csv(path_tweetsPredictions)
model_test_results.head()

### YOUR CODE HERE
accuracy_score(model_test_results["label"], model_test_results["prediction"]) # SOLUTION

0.8151162790697675

### YOUR CODE HERE
TP,FP,TN,FN = compute_confusion_matrix(model_test_results) # SOLUTION
print(f'TP: {TP}\nFP: {FP}\nTN: {TN}\nFN: {FN}\n') # SOLUTION
draw_confusion_matrix(TP, FP, TN, FN) # SOLUTION

TP: 162
FP: 81
TN: 539
FN: 78

counts = model_test_results["label"].value_counts()
print("Number of non-offensive tweets (labelled 0):", counts[0])
print("Number of offensive tweets (labelled 1):", counts[1])

Number of non-offensive tweets (labelled 0): 620
Number of offensive tweets (labelled 1): 240

def false_positive_rate(TP, FP, TN, FN):
    ### YOUR CODE HERE
    FPR = FP / (FP + TN) # SOLUTION
    
    return FPR

def false_negative_rate(TP, FP, TN, FN):
    ### YOUR CODE HERE
    FNR = FN / (FN + TP) # SOLUTION

    return FNR

test(false_positive_rate)

🆗 Tests passed ! =)

test(false_negative_rate)

🆗 Tests passed ! =)

# Load the predictions from the model on the test dataset
comparisons = pd.DataFrame({'prediction': model_test_results["prediction"], 'label': model_test_results["label"]})
TP,FP,TN,FN = compute_confusion_matrix(comparisons)

# Compute the FPR and FND
FPR = false_positive_rate(TP, FP, TN, FN)
FNR = false_negative_rate(TP, FP, TN, FN)

# Display the results
print(f'Proportion of tweets wrongly censored (FPR): {round(FPR*100)}%')
print(f'Proportion of tweets that have wrongly escaped moderation (FNR): {round(FNR*100)}%')

Proportion of tweets wrongly censored (FPR): 13%
Proportion of tweets that have wrongly escaped moderation (FNR): 32%

# Import the libraries we need
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from threadpoolctl import ThreadpoolController, threadpool_limits 
controller = ThreadpoolController()

# Optimization: if GPUs are available we use them, if not, we limit the number of threads to avoid loosing performance
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_num_threads(2)

# Loading our pre-trained model
model = AutoModelForSequenceClassification.from_pretrained("RS-course/model-safety-W1").to(device)

# Loading a tokenizer to preprocess the text
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')

# Building a pipeline that combines the tokenizer and the model
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)

# Create a function that generates predictions with the model for an array of content texts
@controller.wrap(limits=2, user_api='blas') # limits the number of threads to 2 to avoid performance loss
def predict(data, classifier): 
    """ Applies the provided classifier to the data and returns a dataframe with the resulting predictions.

    Arguments:
    - data: the data to classify (simple list/array, iterable)
    - classifier: classifying pipeline to use

    Returns:
    - Pandas dataframe with two columns: the texts and the obtained prediction from the model
    """
    items = []
    predictions = []
    
    # for all items in the data
    for item in data:
        # save the item
        items.append(item)
        
        # run the model and save the prediction result
        pred = classifier(item)[0]['label']
        predictions.append('Not offensive' if pred=="LABEL_0" else 'Offensive')

    return pd.DataFrame({'items': items, 'predictions': predictions})

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.

tweets = [
    "Just spilled coffee over my new shirt. Great start to the day!",
    "Apparently, the secret to success is waking up at 4 am. Guess I'm doomed.",
    "Spent two hours in traffic today. Absolutely loved it.",
    # Add more examples here...
]

predict(tweets, classifier)

	text
0	Cats rule, dogs drule.
1	Cats are the worst.
2	Dogs are the best pet you could ask for.
3	Dogs are a man's best friend.
4	Cats are smarter than dogs.
5	Cats are the best pet.
6	Cats can jump higher than dogs.
7	Cats are the bee's knees.
8	All dogs go to heaven.
9	Dogs are not cool.

	text
0	cats are the worst!
1	I hate cats
2	Ewwww cats
3	Cats are so cute and amazing
4	Dogs are the worst

	prediction	label
290	1	1
661	1	0
200	1	0
80	1	1
148	1	1

		Predicted value
		Positive (1)	Negative (0)
Actual value i.e. label	Positive (1)	True Positives	False Negatives
Actual value i.e. label	Negative (0)	False Positives	True Negatives

TP = 162	FN = 78
FP = 81	TN = 539

Safety Week 1: Content Moderation¶

Welcome to Week 1 exercise's session of Responsible Software!¶

Part 1: Cats and dogs¶

The scenario¶

Part 1.1 Keyword Matching¶

Task 1: Remove posts with the word "dogs"¶

Task 2: Matching a list of banned words¶

Part 1.2 Sentiment Analysis¶

Task: Detect negative statements¶

Conclusion from Part 1¶

Part 2: Twitter¶

The dataset¶

Part 2.1: Using an opinion lexicon¶

Implementing the filtering system¶

Task 1: Compute the accuracy¶

Task 2: Generate the confusion matrix¶

Part 2.2: Using a deep learning model¶

Task 1: Compute the accuracy and confusion matrix¶

Task 2: Relative proportions of false positives and false negatives¶

Task 3: Use the model to evaluate sample tweets¶

Synthesis¶

Conclusion¶

	text	contains_banned_word
0	Cats rule, dogs drule.	True
1	Cats are the worst.	False
2	Dogs are the best pet you could ask for.	True
3	Dogs are a man's best friend.	True
4	Cats are smarter than dogs.	True
5	Cats are the best pet.	False
6	Cats can jump higher than dogs.	True
7	Cats are the bee's knees.	False
8	All dogs go to heaven.	True
9	Dogs are not cool.	True

	text	contains_banned_word
0	dawgs are the best!	True
1	He's a cool cat.	False
2	Cats > doggos	True
3	Did you know that cats can live 1000 lives ?	False
4	Doggies forever!	True
5	I heard that this kibble is the best one of all.	False
6	Fido had to get his tooth pulled.	False
7	Isn't her doggo the cutest thing?	True
8	Cats are horrible	False

	items	predictions
0	Just spilled coffee over my new shirt. Great start to the day!	Not offensive
1	Apparently, the secret to success is waking up at 4 am. Guess I'm doomed.	Not offensive
2	Spent two hours in traffic today. Absolutely loved it.	Not offensive