File size: 3,772 Bytes
943f771
3c47333
bdd59c4
 
0123138
 
943f771
bdd59c4
97bb6e6
bdd59c4
982df32
2c8ba11
97bb6e6
f61ed22
0123138
 
 
5bdc091
0123138
bdd59c4
598a22d
0123138
f2f0fac
7f9eccb
 
bdd59c4
5bdc091
f2f0fac
803a4d2
0123138
803a4d2
 
 
8f757dd
803a4d2
 
 
 
 
 
 
 
 
 
 
5bdc091
f2f0fac
8f757dd
 
 
 
 
0123138
8f757dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bdd59c4
 
 
 
 
8f757dd
 
bdd59c4
 
 
 
 
 
8f757dd
bdd59c4
 
 
 
 
 
f2f0fac
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import openai
import requests

# Define the available models and tasks
TASKS = ["sentiment-analysis", "ner", "text-classification"]
MODELS = {
    "DistilBERT": "distilbert-base-uncased",
    "BERT": "bert-base-uncased",
    "RoBERTa": "roberta-base",
    "LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf",
    "LLaMA2_70B": "meta-llama/Llama-2-70b-hf",
    "ChatGLM3_6B": "THUDM/chatglm-6b",
    "InternLM_7B": "internlm/internlm-7b",
    "Falcon_7B": "tiiuae/falcon-7b"
    # Add other Hugging Face models here
}

# Function to load pipeline for Hugging Face models
def load_pipeline(task, model):
    model_name = MODELS[model]
    return pipeline(task, model=model_name)

# Function to predict using Hugging Face models and OpenAI models
def predict(task, model, text):
    try:
        selected_pipeline = load_pipeline(task, model)
        if model in ["ChatGPT", "GPT4"]:
            # OpenAI API request
            response = openai.ChatCompletion.create(
                model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
                messages=[{"role": "user", "content": text}]
            )
            return response['choices'][0]['message']['content']
        else:
            # Hugging Face pipeline
            results = selected_pipeline(text)
            return results
    except Exception as e:
        print(f"Error in prediction: {e}")
        return {"error": str(e)}

# Function to benchmark Hugging Face models and OpenAI models
def benchmark(task, model, file):
    try:
        data = pd.read_csv(file.name)
        texts = data['query'].tolist()
        true_labels = data['answer'].tolist()

        predictions = []
        if model in ["ChatGPT", "GPT4"]:
            for text in texts:
                response = openai.ChatCompletion.create(
                    model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
                    messages=[{"role": "user", "content": text}]
                )
                predictions.append(response['choices'][0]['message']['content'].strip())
        else:
            selected_pipeline = load_pipeline(task, model)
            predictions = [selected_pipeline(text)[0]['label'] for text in texts]

        accuracy = accuracy_score(true_labels, predictions)
        precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')

        return {
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        }
    except Exception as e:
        print(f"Error in benchmarking: {e}")
        return {"error": str(e)}

# Define the Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        task_input = gr.Dropdown(TASKS, label="Task")
        model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model")

    with gr.Tab("Predict"):
        with gr.Row():
            text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text")
            predict_button = gr.Button("Predict")
        predict_output = gr.JSON(label="Prediction Output")
        predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output)

    with gr.Tab("Benchmark"):
        with gr.Row():
            file_input = gr.File(label="Upload CSV for Benchmarking")
            benchmark_button = gr.Button("Benchmark")
        benchmark_output = gr.JSON(label="Benchmark Output")
        benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output)

demo.launch()