Spaces:
Sleeping
Sleeping
| import json | |
| import random | |
| import sys | |
| import numpy as np | |
| import pandas as pd | |
| import streamlit as st | |
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import pipeline | |
| title = "Model Exploration" | |
| description = "Comparison of hate speech detection models" | |
| date = "2022-01-26" | |
| thumbnail = "images/robot.png" | |
| __HATE_DETECTION = """ | |
| Once the data has been collected using the definitions identified for the | |
| task, you can start training your model. At training, the model takes in | |
| the data with labels and learns the associated context in the input data | |
| for each label. Depending on the task design, the labels may be binary like | |
| 'hateful' and 'non-hateful' or multiclass like 'neutral', 'offensive', and | |
| 'attack'. | |
| When presented with a new input string, the model then predicts the | |
| likelihood that the input is classified as each of the available labels and | |
| returns the label with the highest likelihood as well as how confident the | |
| model is in its selection using a score from 0 to 1. | |
| Neural models such as transformers are frequently trained as general | |
| language models and then fine-tuned on specific classification tasks. | |
| These models can vary in their architecture and the optimization | |
| algorithms, sometimes resulting in very different output for the same | |
| input text. | |
| The models used below include: | |
| - [RoBERTa trained on FRENK dataset](https://huggingface.co/classla/roberta-base-frenk-hate) | |
| - [RoBERTa trained on Twitter Hate Speech](https://huggingface.co/cardiffnlp/twitter-roberta-base-hate) | |
| - [DeHateBERT model (trained on Twitter and StormFront)](https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-english) | |
| - [RoBERTa trained on 11 English hate speech datasets](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r1-target) | |
| - [RoBERTa trained on 11 English hate speech datasets and Round 1 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r2-target) | |
| - [RoBERTa trained on 11 English hate speech datasets and Rounds 1 and 2 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r3-target) | |
| - [RoBERTa trained on 11 English hate speech datasets and Rounds 1, 2, and 3 of the Dynamically Generated Hate Speech Dataset](https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target) | |
| """ | |
| __HATECHECK = """ | |
| [Röttinger et al. (2021)](https://aclanthology.org/2021.acl-long.4.pdf) | |
| developed a list of 3,901 test cases for hate speech detection models called | |
| HateCheck. HateCheck provides a number of templates long with placeholders for | |
| identity categories and hateful terms along with labels indicating whether a | |
| model should or should not categorize the instance as hate speech. For each | |
| case, they created several examples with different | |
| identity attributes to test models' abilities to detect hate speech towards | |
| a range of groups of people. Additionally, they used more difficult | |
| linguistic contexts such as adding negation or more nuanced words to try to fool the | |
| model. See some of there examples using the button or try to make | |
| your own examples to test the models in the tools below. | |
| *** Warning: these examples may include hateful and violent content as | |
| well as slurs and other offensive languages *** | |
| """ | |
| __RANKING = """ | |
| When models process a given input, they calculate the probability of | |
| that input being labeled with each of the possible labels (in binary | |
| cases for example, either 'hateful' or 'not hateful'). The label with | |
| the highest probably is returned. If we test multiple input sentences | |
| for a given model, we can see which input sentences have the | |
| highest probabilities, indicating which examples the model is most | |
| confident in classifying. | |
| Try comparing different input sentences for a given model | |
| using the tool below. | |
| """ | |
| __COMPARISON = """ | |
| Depending on their training data and parameters, models can return very | |
| different outputs for the same input. Knowing how models differ in | |
| their behavior can help with choosing an appropriate model for your | |
| given use case. | |
| Additionally, models trained on one kind of data can perform very | |
| differently when tested on novel data. To show the models' performance | |
| in a variety of settings, we also show the results of each model on | |
| four datasets: the entirety of the [HateCheck Dataset](https://github.com/paul-rottger/hatecheck-data), | |
| the [Dynamically Generated Hate Speech Dataset](https://github.com/bvidgen/Dynamically-Generated-Hate-Speech-Dataset), | |
| the [Hateful Memes Dataset](https://huggingface.co/datasets/limjiayi/hateful_memes_expanded) | |
| (without the images), and a 2000-instance subsection of the | |
| [Open Subtitles](https://opus.nlpl.eu/OpenSubtitles-v2018.php) | |
| (English) Dataset. While the first three are hate speech datasets and have | |
| labels for whether or not each instance is hateful, the Open Subtitles dataset | |
| contains movie subtitles and is not labeled for instances of hate. | |
| Try comparing two different models' output for the same comparison | |
| sentence using the tool below. | |
| """ | |
| __MODELS = [ | |
| "classla/roberta-base-frenk-hate", | |
| "cardiffnlp/twitter-roberta-base-hate", | |
| "Hate-speech-CNERG/dehatebert-mono-english", | |
| "facebook/roberta-hate-speech-dynabench-r1-target", | |
| "facebook/roberta-hate-speech-dynabench-r2-target", | |
| "facebook/roberta-hate-speech-dynabench-r3-target", | |
| "facebook/roberta-hate-speech-dynabench-r4-target", | |
| ] | |
| autopop_ranking = [ | |
| [ | |
| "I don't hate black people.", | |
| "Black people should die.", | |
| "My friend likes pancakes.", | |
| ], | |
| [ | |
| "Those dirty redskins are all drunks.", | |
| "I can't stand Chinese food. It's too spicy.", | |
| "I lived near many Chinese people and I hated every single one of them.", | |
| ], | |
| [ | |
| "Stop saying that Mexicans don't belong in our country!", | |
| "We walked along the dyke down the road.", | |
| "Ah shit, I fucked up.", | |
| ], | |
| ] | |
| # Creates the forms for receiving multiple inputs to compare for a single | |
| # model or one input to compare for two models | |
| def run_article(): | |
| st.markdown("# Making a Hate Speech Detection Model") | |
| with st.expander("Hate Speech Detection Models", expanded=False): | |
| st.markdown(__HATE_DETECTION) | |
| hc_path = "posts/resources/" | |
| hc_pholders = json.load( | |
| open(hc_path + "template_placeholders.json", encoding="utf-8") | |
| ) | |
| hc_templates = json.load( | |
| open(hc_path + "hatecheck_category_templates.json", encoding="utf-8") | |
| ) | |
| hc_info = json.load( | |
| open(hc_path + "hatecheck_category_info.json", encoding="utf-8") | |
| ) | |
| hc_cats = [""] + list(hc_info.keys()) | |
| st.markdown("## Testing Models' Behavior") | |
| with st.expander("HateCheck Examples", expanded=False): | |
| st.markdown(__HATECHECK) | |
| category = st.selectbox( | |
| "Select a category of examples from HateCheck", hc_cats, key="hc_cat_select" | |
| ) | |
| if category: | |
| with st.form(key="hate_check"): | |
| hc_cat = hc_info[category] | |
| templates = [] | |
| names = [] | |
| for hc_temp in hc_cat: | |
| templates.append(hc_temp) | |
| names.append(hc_cat[hc_temp]["name"]) | |
| selected_names = st.multiselect( | |
| "Select one or more HateCheck templates to generate examples for", | |
| names, | |
| key="hc_temp_multiselect", | |
| ) | |
| num_exs = st.number_input( | |
| "Select a number of examples to generate for each selected template", | |
| min_value=1, | |
| max_value=5, | |
| value=3, | |
| ) | |
| if st.form_submit_button(label="Generate Examples"): | |
| for name in selected_names: | |
| index = names.index(name) | |
| template = templates[index] | |
| examples = generate_hc_ex( | |
| hc_templates[template], hc_pholders, num_exs | |
| ) | |
| st.header(name) | |
| st.subheader("Label: " + hc_cat[template]["value"]) | |
| st.caption(hc_cat[template]["desc"]) | |
| for num in range(num_exs): | |
| ex = examples[num] | |
| st.write("Example #" + str(num + 1) + ": " + ex) | |
| st.markdown("## Model Output Ranking") | |
| with st.expander("Model Output Ranking Tool", expanded=False): | |
| st.markdown(__RANKING) | |
| with st.form(key="ranking"): | |
| model_name = st.selectbox( | |
| "Select a model to test", | |
| __MODELS, | |
| ) | |
| # the placeholder key functionality was added in v1.2 of streamlit | |
| # and versions on Spaces currently goes up to v1.0 | |
| input_1 = st.text_input( | |
| "Input 1", | |
| help="Try a phrase like 'We shouldn't let [IDENTITY] suffer.'", | |
| # placeholder="We shouldn't let [IDENTITY] suffer." | |
| ) | |
| input_2 = st.text_input( | |
| "Input 2", | |
| help="Try a phrase like 'I'd rather die than date [IDENTITY].'", | |
| # placeholder="I'd rather die than date [IDENTITY]." | |
| ) | |
| input_3 = st.text_input( | |
| "Input 3", | |
| help="Try a phrase like 'Good morning'", | |
| # placeholder="Good morning." | |
| ) | |
| autopop = st.checkbox( | |
| "Choose examples for me", | |
| key="rank_autopop_ckbx", | |
| help="Check this box to run the model with 3 preselected sentences.", | |
| ) | |
| if st.form_submit_button(label="Rank inputs"): | |
| if autopop: | |
| rank_inputs = random.choice(autopop_ranking) | |
| else: | |
| rank_inputs = [input_1, input_2, input_3] | |
| sys.stderr.write("\n" + str(rank_inputs) + "\n") | |
| results = run_ranked(model_name, rank_inputs) | |
| st.dataframe(results) | |
| st.markdown("## Model Comparison") | |
| with st.expander("Model Comparison Tool", expanded=False): | |
| st.markdown(__COMPARISON) | |
| with st.form(key="comparison"): | |
| model_name_1 = st.selectbox( | |
| "Select a model to compare", | |
| __MODELS, | |
| key="compare_model_1", | |
| ) | |
| model_name_2 = st.selectbox( | |
| "Select another model to compare", | |
| __MODELS, | |
| key="compare_model_2", | |
| ) | |
| autopop = st.checkbox( | |
| "Choose an example for me", | |
| key="comp_autopop_ckbx", | |
| help="Check this box to compare the models with a preselected sentence.", | |
| ) | |
| input_text = st.text_input("Comparison input") | |
| if st.form_submit_button(label="Compare models"): | |
| if autopop: | |
| input_text = random.choice(random.choice(autopop_ranking)) | |
| results = run_compare(model_name_1, model_name_2, input_text) | |
| st.write("### Showing results for: " + input_text) | |
| st.dataframe(results) | |
| outside_ds = ["hatecheck", "dynabench", "hatefulmemes", "opensubtitles"] | |
| name_1_short = model_name_1.split("/")[1] | |
| name_2_short = model_name_2.split("/")[1] | |
| for calib_ds in outside_ds: | |
| ds_loc = "posts/resources/charts/" + calib_ds + "/" | |
| images, captions = [], [] | |
| for model in [name_1_short, name_2_short]: | |
| images.append(ds_loc + model + "_" + calib_ds + ".png") | |
| captions.append("Counts of dataset instances by hate score.") | |
| st.write("#### Model performance comparison on " + calib_ds) | |
| st.image(images, captions) | |
| # if model_name_1 == "Hate-speech-CNERG/dehatebert-mono-english": | |
| # st.image("posts/resources/dehatebert-mono-english_calibration.png") | |
| # elif model_name_1 == "cardiffnlp/twitter-roberta-base-hate": | |
| # st.image("posts/resources/twitter-roberta-base-hate_calibration.png") | |
| # st.write("Calibration of Model 2") | |
| # if model_name_2 == "Hate-speech-CNERG/dehatebert-mono-english": | |
| # st.image("posts/resources/dehatebert-mono-english_calibration.png") | |
| # elif model_name_2 == "cardiffnlp/twitter-roberta-base-hate": | |
| # st.image("posts/resources/twitter-roberta-base-hate_calibration.png") | |
| # Takes in a Hate Check template and placeholders and generates the given | |
| # number of random examples from the template, inserting a random instance of | |
| # an identity category if there is a placeholder in the template | |
| def generate_hc_ex(template, placeholders, gen_num): | |
| sampled = random.sample(template, gen_num) | |
| ph_cats = list(placeholders.keys()) | |
| for index in range(len(sampled)): | |
| sample = sampled[index] | |
| for ph_cat in ph_cats: | |
| if ph_cat in sample: | |
| insert = random.choice(placeholders[ph_cat]) | |
| sampled[index] = sample.replace(ph_cat, insert).capitalize() | |
| return sampled | |
| # Runs the received input strings through the given model and returns the | |
| # all scores for all possible labels as a DataFrame | |
| def run_ranked(model, input_list): | |
| classifier = pipeline("text-classification", model=model, return_all_scores=True) | |
| output = {} | |
| results = classifier(input_list) | |
| for result in results: | |
| for index in range(len(result)): | |
| label = result[index]["label"] | |
| score = result[index]["score"] | |
| if label in output: | |
| output[label].append(score) | |
| else: | |
| new_out = [score] | |
| output[label] = new_out | |
| return pd.DataFrame(output, index=input_list) | |
| # Takes in two model names and returns the output of both models for that | |
| # given input string | |
| def run_compare(name_1, name_2, text): | |
| classifier_1 = pipeline("text-classification", model=name_1) | |
| result_1 = classifier_1(text) | |
| out_1 = {} | |
| out_1["Model"] = name_1 | |
| out_1["Label"] = result_1[0]["label"] | |
| out_1["Score"] = result_1[0]["score"] | |
| classifier_2 = pipeline("text-classification", model=name_2) | |
| result_2 = classifier_2(text) | |
| out_2 = {} | |
| out_2["Model"] = name_2 | |
| out_2["Label"] = result_2[0]["label"] | |
| out_2["Score"] = result_2[0]["score"] | |
| return [out_1, out_2] | |