Spaces:

Nickeik
/

FiLM_Benchmark

Sleeping

App Files Files Community

Nickeik commited on Jul 5, 2024

Commit

8f757dd

verified ·

1 Parent(s): 803a4d2

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -29

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ def predict(task, model, text):
         if model in ["ChatGPT", "GPT4"]:
             # OpenAI API request
             response = openai.ChatCompletion.create(
-                model=MODELS[model],
                 messages=[{"role": "user", "content": text}]
             )
             return response['choices'][0]['message']['content']
@@ -43,48 +43,51 @@ def predict(task, model, text):
         print(f"Error in prediction: {e}")
         return {"error": str(e)}
 # Function to benchmark Hugging Face models and OpenAI models
 def benchmark(task, model, file):
-    data = pd.read_csv(file.name)
-    texts = data['query'].tolist()
-    true_labels = data['answer'].tolist()
-    if model in ["ChatGPT", "GPT-4"]:
         predictions = []
-        for text in texts:
-            response = openai.ChatCompletion.create(
-                model="gpt-4" if model == "GPT-4" else "gpt-3.5-turbo",
-                messages=[{"role": "user", "content": text}]
-            )
-            predictions.append(response.choices[0].message['content'].strip())
-    else:
-        selected_pipeline = load_pipeline(task, model)
-        predictions = [selected_pipeline(text)[0]['label'] for text in texts]
-    accuracy = accuracy_score(true_labels, predictions)
-    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')
-    return {
-        "Accuracy": accuracy,
-        "Precision": precision,
-        "Recall": recall,
-        "F1 Score": f1
-    }
 # Define the Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         task_input = gr.Dropdown(TASKS, label="Task")
-        model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT-4"], label="Model")
     with gr.Tab("Predict"):
         with gr.Row():
             text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text")
             predict_button = gr.Button("Predict")
         predict_output = gr.JSON(label="Prediction Output")
         predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output)
     with gr.Tab("Benchmark"):
         with gr.Row():
             file_input = gr.File(label="Upload CSV for Benchmarking")

         if model in ["ChatGPT", "GPT4"]:
             # OpenAI API request
             response = openai.ChatCompletion.create(
+                model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
                 messages=[{"role": "user", "content": text}]
             )
             return response['choices'][0]['message']['content']
         print(f"Error in prediction: {e}")
         return {"error": str(e)}
 # Function to benchmark Hugging Face models and OpenAI models
 def benchmark(task, model, file):
+    try:
+        data = pd.read_csv(file.name)
+        texts = data['query'].tolist()
+        true_labels = data['answer'].tolist()
         predictions = []
+        if model in ["ChatGPT", "GPT4"]:
+            for text in texts:
+                response = openai.ChatCompletion.create(
+                    model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
+                    messages=[{"role": "user", "content": text}]
+                )
+                predictions.append(response['choices'][0]['message']['content'].strip())
+        else:
+            selected_pipeline = load_pipeline(task, model)
+            predictions = [selected_pipeline(text)[0]['label'] for text in texts]
+        accuracy = accuracy_score(true_labels, predictions)
+        precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')
+        return {
+            "Accuracy": accuracy,
+            "Precision": precision,
+            "Recall": recall,
+            "F1 Score": f1
+        }
+    except Exception as e:
+        print(f"Error in benchmarking: {e}")
+        return {"error": str(e)}
 # Define the Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         task_input = gr.Dropdown(TASKS, label="Task")
+        model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model")
     with gr.Tab("Predict"):
         with gr.Row():
             text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text")
             predict_button = gr.Button("Predict")
         predict_output = gr.JSON(label="Prediction Output")
         predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output)
     with gr.Tab("Benchmark"):
         with gr.Row():
             file_input = gr.File(label="Upload CSV for Benchmarking")