Spaces:
Sleeping
Sleeping
Niklauseik
commited on
Commit
·
0123138
1
Parent(s):
f90ea8d
llm
Browse files
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 3 |
import pandas as pd
|
| 4 |
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Define the available models and tasks
|
| 7 |
TASKS = ["sentiment-analysis", "ner", "text-classification"]
|
|
@@ -9,26 +11,54 @@ MODELS = {
|
|
| 9 |
"DistilBERT": "distilbert-base-uncased",
|
| 10 |
"BERT": "bert-base-uncased",
|
| 11 |
"RoBERTa": "roberta-base",
|
| 12 |
-
"
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
}
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def load_pipeline(task, model):
|
| 17 |
model_name = MODELS[model]
|
| 18 |
return pipeline(task, model=model_name)
|
| 19 |
|
|
|
|
| 20 |
def predict(task, model, text):
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
return results
|
| 24 |
|
|
|
|
| 25 |
def benchmark(task, model, file):
|
| 26 |
data = pd.read_csv(file.name)
|
| 27 |
-
texts = data['
|
| 28 |
-
true_labels = data['
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
accuracy = accuracy_score(true_labels, predictions)
|
| 34 |
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')
|
|
@@ -44,7 +74,7 @@ def benchmark(task, model, file):
|
|
| 44 |
with gr.Blocks() as demo:
|
| 45 |
with gr.Row():
|
| 46 |
task_input = gr.Dropdown(TASKS, label="Task")
|
| 47 |
-
model_input = gr.Dropdown(list(MODELS.keys()), label="Model")
|
| 48 |
|
| 49 |
with gr.Tab("Predict"):
|
| 50 |
with gr.Row():
|
|
|
|
| 2 |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
|
| 3 |
import pandas as pd
|
| 4 |
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
|
| 5 |
+
import openai
|
| 6 |
+
import requests
|
| 7 |
|
| 8 |
# Define the available models and tasks
|
| 9 |
TASKS = ["sentiment-analysis", "ner", "text-classification"]
|
|
|
|
| 11 |
"DistilBERT": "distilbert-base-uncased",
|
| 12 |
"BERT": "bert-base-uncased",
|
| 13 |
"RoBERTa": "roberta-base",
|
| 14 |
+
"LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf",
|
| 15 |
+
"LLaMA2_70B": "meta-llama/Llama-2-70b-hf",
|
| 16 |
+
"ChatGLM3_6B": "THUDM/chatglm-6b",
|
| 17 |
+
"InternLM_7B": "internlm/internlm-7b",
|
| 18 |
+
"Falcon_7B": "tiiuae/falcon-7b",
|
| 19 |
+
# Add other Hugging Face models here
|
| 20 |
}
|
| 21 |
|
| 22 |
+
# Set up OpenAI API key
|
| 23 |
+
openai.api_key = "YOUR_OPENAI_API_KEY"
|
| 24 |
+
|
| 25 |
+
# Function to load pipeline for Hugging Face models
|
| 26 |
def load_pipeline(task, model):
|
| 27 |
model_name = MODELS[model]
|
| 28 |
return pipeline(task, model=model_name)
|
| 29 |
|
| 30 |
+
# Function to predict using Hugging Face models
|
| 31 |
def predict(task, model, text):
|
| 32 |
+
if model.startswith("ChatGPT"):
|
| 33 |
+
response = openai.Completion.create(
|
| 34 |
+
engine="text-davinci-003",
|
| 35 |
+
prompt=text,
|
| 36 |
+
max_tokens=50
|
| 37 |
+
)
|
| 38 |
+
results = [{"label": response.choices[0].text.strip()}]
|
| 39 |
+
else:
|
| 40 |
+
selected_pipeline = load_pipeline(task, model)
|
| 41 |
+
results = selected_pipeline(text)
|
| 42 |
return results
|
| 43 |
|
| 44 |
+
# Function to benchmark Hugging Face models
|
| 45 |
def benchmark(task, model, file):
|
| 46 |
data = pd.read_csv(file.name)
|
| 47 |
+
texts = data['query'].tolist()
|
| 48 |
+
true_labels = data['answer'].tolist()
|
| 49 |
|
| 50 |
+
if model.startswith("ChatGPT"):
|
| 51 |
+
predictions = []
|
| 52 |
+
for text in texts:
|
| 53 |
+
response = openai.Completion.create(
|
| 54 |
+
engine="text-davinci-003",
|
| 55 |
+
prompt=text,
|
| 56 |
+
max_tokens=50
|
| 57 |
+
)
|
| 58 |
+
predictions.append(response.choices[0].text.strip())
|
| 59 |
+
else:
|
| 60 |
+
selected_pipeline = load_pipeline(task, model)
|
| 61 |
+
predictions = [selected_pipeline(text)[0]['label'] for text in texts]
|
| 62 |
|
| 63 |
accuracy = accuracy_score(true_labels, predictions)
|
| 64 |
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')
|
|
|
|
| 74 |
with gr.Blocks() as demo:
|
| 75 |
with gr.Row():
|
| 76 |
task_input = gr.Dropdown(TASKS, label="Task")
|
| 77 |
+
model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT"], label="Model")
|
| 78 |
|
| 79 |
with gr.Tab("Predict"):
|
| 80 |
with gr.Row():
|