j-tobias
commited on
Commit
·
1dc0a7f
1
Parent(s):
38c1da3
add front end
Browse files- .gitignore +1 -0
- README.md +5 -2
- app.py +108 -0
- createevalset.py +0 -0
- dataset.py +36 -0
- eval.py +0 -0
- model.py +0 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
credentials.json
|
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
---
|
| 2 |
title: VocalVenturer
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.41.0
|
| 8 |
app_file: app.py
|
|
@@ -10,3 +10,6 @@ pinned: false
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: VocalVenturer
|
| 3 |
+
emoji: 💬
|
| 4 |
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.41.0
|
| 8 |
app_file: app.py
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Helpful Resources
|
app.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr # needs to be installed
|
| 2 |
+
|
| 3 |
+
from dataset import Dataset
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
dataset = Dataset()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def eval(data_subset:str, model_1:str, model_2:str, model_3:str)->str:
|
| 10 |
+
|
| 11 |
+
result = f"""# HELLO THERE
|
| 12 |
+
Data Subset: {data_subset}
|
| 13 |
+
Model 1: {model_1}
|
| 14 |
+
Model 2: {model_2}
|
| 15 |
+
Model_3: {model_3}
|
| 16 |
+
"""
|
| 17 |
+
return result
|
| 18 |
+
|
| 19 |
+
def get_card(selected_model:str)->str:
|
| 20 |
+
|
| 21 |
+
if selected_model == "None":
|
| 22 |
+
return ""
|
| 23 |
+
elif selected_model == "Model2":
|
| 24 |
+
return "A very good model indeed"
|
| 25 |
+
elif selected_model == "Model3":
|
| 26 |
+
return "Also very good"
|
| 27 |
+
else:
|
| 28 |
+
return "Unknown Model"
|
| 29 |
+
|
| 30 |
+
def is_own(data_subset:str):
|
| 31 |
+
if data_subset == "own":
|
| 32 |
+
own_audio = gr.Audio(sources=['microphone'],streaming=False)
|
| 33 |
+
own_transcription = gr.TextArea(lines=2)
|
| 34 |
+
return own_audio, own_transcription
|
| 35 |
+
own_audio = None
|
| 36 |
+
own_transcription = None
|
| 37 |
+
return own_audio, own_transcription
|
| 38 |
+
|
| 39 |
+
with gr.Blocks() as app:
|
| 40 |
+
|
| 41 |
+
gr.Markdown("# VocalVenturer 💬")
|
| 42 |
+
gr.Markdown("-------")
|
| 43 |
+
gr.Markdown("""
|
| 44 |
+
Hello there, this is the VocalVenturer, this app is aimed at helping you making more informed model choices for ASR.
|
| 45 |
+
Please choose a Data Subset to evalutate the Models on. You also have the opportunity to record and transcibe an own sample.
|
| 46 |
+
The Models will be evaluated using the *WER* metric -> here is an amazing Leaderboard for it LINK""")
|
| 47 |
+
|
| 48 |
+
with gr.Row():
|
| 49 |
+
with gr.Column(scale=1):
|
| 50 |
+
pass
|
| 51 |
+
with gr.Column(scale=3):
|
| 52 |
+
data_subset = gr.Radio(
|
| 53 |
+
value="Libris",
|
| 54 |
+
choices=["Libris","Common","own"],
|
| 55 |
+
label="Data subset / Own Sample",
|
| 56 |
+
)
|
| 57 |
+
with gr.Column(scale=1):
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
with gr.Row():
|
| 61 |
+
own_audio = gr.Audio(sources=['microphone'],streaming=False,visible=False)
|
| 62 |
+
own_transcription = gr.TextArea(lines=2, visible=False)
|
| 63 |
+
|
| 64 |
+
data_subset.change(is_own, inputs=[data_subset], outputs=[own_audio, own_transcription])
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
with gr.Row():
|
| 68 |
+
|
| 69 |
+
with gr.Column(scale=1):
|
| 70 |
+
model_1 = gr.Dropdown(
|
| 71 |
+
choices=["None","Model2","Model3"],
|
| 72 |
+
label="Select Model 1"
|
| 73 |
+
)
|
| 74 |
+
model_1_card = gr.Markdown("")
|
| 75 |
+
|
| 76 |
+
with gr.Column(scale=1):
|
| 77 |
+
model_2 = gr.Dropdown(
|
| 78 |
+
choices=["None","Model2","Model3"],
|
| 79 |
+
label="Select Model 2"
|
| 80 |
+
)
|
| 81 |
+
model_2_card = gr.Markdown("")
|
| 82 |
+
|
| 83 |
+
with gr.Column(scale=1):
|
| 84 |
+
model_3 = gr.Dropdown(
|
| 85 |
+
choices=["None","Model2","Model3"],
|
| 86 |
+
label="Select Model 3"
|
| 87 |
+
)
|
| 88 |
+
model_3_card = gr.Markdown("")
|
| 89 |
+
|
| 90 |
+
model_1.change(get_card, inputs=model_1, outputs=model_1_card)
|
| 91 |
+
model_2.change(get_card, inputs=model_2, outputs=model_2_card)
|
| 92 |
+
model_3.change(get_card, inputs=model_3, outputs=model_3_card)
|
| 93 |
+
|
| 94 |
+
eval_btn = gr.Button(
|
| 95 |
+
value="Evaluate",
|
| 96 |
+
variant="primary"
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
gr.Markdown("-------")
|
| 102 |
+
gr.Markdown("### Results")
|
| 103 |
+
|
| 104 |
+
results = gr.Markdown("")
|
| 105 |
+
|
| 106 |
+
eval_btn.click(eval, [data_subset, model_1, model_2, model_3], results)
|
| 107 |
+
|
| 108 |
+
app.launch(debug=True)
|
createevalset.py
ADDED
|
File without changes
|
dataset.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from huggingface_hub import login
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
from datasets import Audio
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
def get_credentials():
|
| 8 |
+
with open("credentials.json","r") as f:
|
| 9 |
+
credentials = json.load(f)
|
| 10 |
+
return credentials['token']
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Dataset:
|
| 15 |
+
|
| 16 |
+
def __init__(self, n:int=100):
|
| 17 |
+
|
| 18 |
+
# # Log the client into HF
|
| 19 |
+
# login(token=get_credentials())
|
| 20 |
+
|
| 21 |
+
# # Load the Dataset in Streaming Mode
|
| 22 |
+
# self.librispeech_clean = load_dataset("librispeech_asr", "all", split="test.clean", streaming=True)
|
| 23 |
+
# self.librispeech_other = load_dataset("librispeech_asr", "all", split="test.other", streaming=True)
|
| 24 |
+
|
| 25 |
+
self.n = n
|
| 26 |
+
|
| 27 |
+
self.options = ['librisspeech_clean','librisspeech_other']
|
| 28 |
+
|
| 29 |
+
def get_option(self):
|
| 30 |
+
return self.options
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
dataset = Dataset()
|
eval.py
ADDED
|
File without changes
|
model.py
ADDED
|
File without changes
|