Spaces:

justus-tobias
/

ASR_Model_Comparison

Paused

App Files Files Community

j-tobias commited on Aug 11, 2024

Commit

1dc0a7f

1 Parent(s): 38c1da3

add front end

Browse files

Files changed (7) hide show

.gitignore +1 -0
README.md +5 -2
app.py +108 -0
createevalset.py +0 -0
dataset.py +36 -0
eval.py +0 -0
model.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ credentials.json

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: VocalVenturer
-emoji: 🐠
 colorFrom: purple
-colorTo: yellow
 sdk: gradio
 sdk_version: 4.41.0
 app_file: app.py
@@ -10,3 +10,6 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: VocalVenturer
+emoji: 💬
 colorFrom: purple
+colorTo: blue
 sdk: gradio
 sdk_version: 4.41.0
 app_file: app.py
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Helpful Resources

app.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import gradio as gr # needs to be installed
+from dataset import Dataset
+dataset = Dataset()
+def eval(data_subset:str, model_1:str, model_2:str, model_3:str)->str:
+    result = f"""# HELLO THERE
+    Data Subset: {data_subset}
+    Model 1: {model_1}
+    Model 2: {model_2}
+    Model_3: {model_3}
+    """
+    return result
+def get_card(selected_model:str)->str:
+    if selected_model == "None":
+        return ""
+    elif selected_model == "Model2":
+        return "A very good model indeed"
+    elif selected_model == "Model3":
+        return "Also very good"
+    else:
+        return "Unknown Model"
+def is_own(data_subset:str):
+    if data_subset == "own":
+        own_audio = gr.Audio(sources=['microphone'],streaming=False)
+        own_transcription = gr.TextArea(lines=2)
+        return own_audio, own_transcription
+    own_audio = None
+    own_transcription = None
+    return own_audio, own_transcription
+with gr.Blocks() as app:
+    gr.Markdown("# VocalVenturer 💬")
+    gr.Markdown("-------")
+    gr.Markdown("""
+                Hello there, this is the VocalVenturer, this app is aimed at helping you making more informed model choices for ASR.
+                Please choose a Data Subset to evalutate the Models on. You also have the opportunity to record and transcibe an own sample.
+                The Models will be evaluated using the *WER* metric -> here is an amazing Leaderboard for it LINK""")
+    with gr.Row():
+        with gr.Column(scale=1):
+            pass
+        with gr.Column(scale=3):
+            data_subset = gr.Radio(
+                value="Libris",
+                choices=["Libris","Common","own"],
+                label="Data subset / Own Sample",
+            )
+        with gr.Column(scale=1):
+            pass
+    with gr.Row():
+        own_audio = gr.Audio(sources=['microphone'],streaming=False,visible=False)
+        own_transcription = gr.TextArea(lines=2, visible=False)
+        data_subset.change(is_own, inputs=[data_subset], outputs=[own_audio, own_transcription])
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_1 = gr.Dropdown(
+                choices=["None","Model2","Model3"],
+                label="Select Model 1"
+            )
+            model_1_card = gr.Markdown("")
+        with gr.Column(scale=1):
+            model_2 = gr.Dropdown(
+                choices=["None","Model2","Model3"],
+                label="Select Model 2"
+            )
+            model_2_card = gr.Markdown("")
+        with gr.Column(scale=1):
+            model_3 = gr.Dropdown(
+                choices=["None","Model2","Model3"],
+                label="Select Model 3"
+            )
+            model_3_card = gr.Markdown("")
+        model_1.change(get_card, inputs=model_1, outputs=model_1_card)
+        model_2.change(get_card, inputs=model_2, outputs=model_2_card)
+        model_3.change(get_card, inputs=model_3, outputs=model_3_card)
+    eval_btn = gr.Button(
+        value="Evaluate",
+        variant="primary"
+    )
+    gr.Markdown("-------")
+    gr.Markdown("### Results")
+    results = gr.Markdown("")
+    eval_btn.click(eval, [data_subset, model_1, model_2, model_3], results)
+app.launch(debug=True)

createevalset.py ADDED Viewed

File without changes

dataset.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from huggingface_hub import login
+from datasets import load_dataset
+from datasets import Audio
+import json
+def get_credentials():
+    with open("credentials.json","r") as f:
+        credentials = json.load(f)
+    return credentials['token']
+class Dataset:
+    def __init__(self, n:int=100):
+        # # Log the client into HF
+        # login(token=get_credentials())
+        # # Load the Dataset in Streaming Mode
+        # self.librispeech_clean = load_dataset("librispeech_asr", "all", split="test.clean", streaming=True)
+        # self.librispeech_other = load_dataset("librispeech_asr", "all", split="test.other", streaming=True)
+        self.n = n
+        self.options = ['librisspeech_clean','librisspeech_other']
+    def get_option(self):
+        return self.options
+dataset = Dataset()

eval.py ADDED Viewed

File without changes

model.py ADDED Viewed

File without changes