import gradio as gr
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import openai
import requests

# Define the available models and tasks
TASKS = ["sentiment-analysis", "ner", "text-classification"]
MODELS = {
    "DistilBERT": "distilbert-base-uncased",
    "BERT": "bert-base-uncased",
    "RoBERTa": "roberta-base",
    "LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf",
    "LLaMA2_70B": "meta-llama/Llama-2-70b-hf",
    "ChatGLM3_6B": "THUDM/chatglm-6b",
    "InternLM_7B": "internlm/internlm-7b",
    "Falcon_7B": "tiiuae/falcon-7b"
    # Add other Hugging Face models here
}

# Function to load pipeline for Hugging Face models
def load_pipeline(task, model):
    model_name = MODELS[model]
    return pipeline(task, model=model_name)

# Function to predict using Hugging Face models and OpenAI models
def predict(task, model, text):
    try:
        selected_pipeline = load_pipeline(task, model)
        if model in ["ChatGPT", "GPT4"]:
            # OpenAI API request
            response = openai.ChatCompletion.create(
                model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
                messages=[{"role": "user", "content": text}]
            )
            return response['choices'][0]['message']['content']
        else:
            # Hugging Face pipeline
            results = selected_pipeline(text)
            return results
    except Exception as e:
        print(f"Error in prediction: {e}")
        return {"error": str(e)}

# Function to benchmark Hugging Face models and OpenAI models
def benchmark(task, model, file):
    try:
        data = pd.read_csv(file.name)
        texts = data['query'].tolist()
        true_labels = data['answer'].tolist()

        predictions = []
        if model in ["ChatGPT", "GPT4"]:
            for text in texts:
                response = openai.ChatCompletion.create(
                    model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
                    messages=[{"role": "user", "content": text}]
                )
                predictions.append(response['choices'][0]['message']['content'].strip())
        else:
            selected_pipeline = load_pipeline(task, model)
            predictions = [selected_pipeline(text)[0]['label'] for text in texts]

        accuracy = accuracy_score(true_labels, predictions)
        precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')

        return {
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1
        }
    except Exception as e:
        print(f"Error in benchmarking: {e}")
        return {"error": str(e)}

# Define the Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        task_input = gr.Dropdown(TASKS, label="Task")
        model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model")

    with gr.Tab("Predict"):
        with gr.Row():
            text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text")
            predict_button = gr.Button("Predict")
        predict_output = gr.JSON(label="Prediction Output")
        predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output)

    with gr.Tab("Benchmark"):
        with gr.Row():
            file_input = gr.File(label="Upload CSV for Benchmarking")
            benchmark_button = gr.Button("Benchmark")
        benchmark_output = gr.JSON(label="Benchmark Output")
        benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output)

demo.launch()