import gradio as gr from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer import pandas as pd from sklearn.metrics import accuracy_score, precision_recall_fscore_support import openai import requests # Define the available models and tasks TASKS = ["sentiment-analysis", "ner", "text-classification"] MODELS = { "DistilBERT": "distilbert-base-uncased", "BERT": "bert-base-uncased", "RoBERTa": "roberta-base", "LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf", "LLaMA2_70B": "meta-llama/Llama-2-70b-hf", "ChatGLM3_6B": "THUDM/chatglm-6b", "InternLM_7B": "internlm/internlm-7b", "Falcon_7B": "tiiuae/falcon-7b" # Add other Hugging Face models here } # Function to load pipeline for Hugging Face models def load_pipeline(task, model): model_name = MODELS[model] return pipeline(task, model=model_name) # Function to predict using Hugging Face models and OpenAI models def predict(task, model, text): try: selected_pipeline = load_pipeline(task, model) if model in ["ChatGPT", "GPT4"]: # OpenAI API request response = openai.ChatCompletion.create( model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo", messages=[{"role": "user", "content": text}] ) return response['choices'][0]['message']['content'] else: # Hugging Face pipeline results = selected_pipeline(text) return results except Exception as e: print(f"Error in prediction: {e}") return {"error": str(e)} # Function to benchmark Hugging Face models and OpenAI models def benchmark(task, model, file): try: data = pd.read_csv(file.name) texts = data['query'].tolist() true_labels = data['answer'].tolist() predictions = [] if model in ["ChatGPT", "GPT4"]: for text in texts: response = openai.ChatCompletion.create( model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo", messages=[{"role": "user", "content": text}] ) predictions.append(response['choices'][0]['message']['content'].strip()) else: selected_pipeline = load_pipeline(task, model) predictions = [selected_pipeline(text)[0]['label'] for text in texts] accuracy = accuracy_score(true_labels, predictions) precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro') return { "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1 } except Exception as e: print(f"Error in benchmarking: {e}") return {"error": str(e)} # Define the Gradio interface with gr.Blocks() as demo: with gr.Row(): task_input = gr.Dropdown(TASKS, label="Task") model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model") with gr.Tab("Predict"): with gr.Row(): text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text") predict_button = gr.Button("Predict") predict_output = gr.JSON(label="Prediction Output") predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output) with gr.Tab("Benchmark"): with gr.Row(): file_input = gr.File(label="Upload CSV for Benchmarking") benchmark_button = gr.Button("Benchmark") benchmark_output = gr.JSON(label="Benchmark Output") benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output) demo.launch()